Fix bug #45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text

git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@682843 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-08-05 18:05:29 +00:00
parent 4cc78b0c0e
commit 2f42314b55
4 changed files with 54 additions and 15 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! -->
<release version="3.5.1-beta2" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
<action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
<action dev="POI-DEVELOPERS" type="add">New class org.apache.poi.hssf.record.RecordFormatException, which DDF uses instead of the HSSF version, and the HSSF version inherits from</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.5.1-beta2" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
<action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
<action dev="POI-DEVELOPERS" type="add">New class org.apache.poi.hssf.record.RecordFormatException, which DDF uses instead of the HSSF version, and the HSSF version inherits from</action>

View File

@ -16,18 +16,20 @@
==================================================================== */
package org.apache.poi.xslf.extractor;
import java.io.File;
import java.io.IOException;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.xslf.XSLFSlideShow;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.xmlbeans.XmlException;
import org.openxml4j.exceptions.OpenXML4JException;
import org.openxml4j.opc.Package;
import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList;
import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
@ -35,17 +37,20 @@ import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
private XSLFSlideShow slideshow;
private XMLSlideShow slideshow;
private boolean slidesByDefault = true;
private boolean notesByDefault = false;
public XSLFPowerPointExtractor(XMLSlideShow slideshow) {
super(slideshow._getXSLFSlideShow());
this.slideshow = slideshow;
}
public XSLFPowerPointExtractor(XSLFSlideShow slideshow) throws XmlException, IOException {
this(new XMLSlideShow(slideshow));
}
public XSLFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
this(new XSLFSlideShow(container));
}
public XSLFPowerPointExtractor(XSLFSlideShow slideshow) {
super(slideshow);
this.slideshow = slideshow;
}
public static void main(String[] args) throws Exception {
if(args.length < 1) {
@ -88,18 +93,32 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
*/
public String getText(boolean slideText, boolean notesText) {
StringBuffer text = new StringBuffer();
CTSlideIdListEntry[] slideRefs =
slideshow.getSlideReferences().getSldIdArray();
for (int i = 0; i < slideRefs.length; i++) {
XSLFSlide[] slides = slideshow.getSlides();
for(int i = 0; i < slides.length; i++) {
CTSlide rawSlide = slides[i]._getCTSlide();
CTSlideIdListEntry slideId = slides[i]._getCTSlideId();
try {
CTSlide slide =
slideshow.getSlide(slideRefs[i]);
// For now, still very low level
CTNotesSlide notes =
slideshow.getNotes(slideRefs[i]);
slideshow._getXSLFSlideShow().getNotes(slideId);
CTCommentList comments =
slideshow._getXSLFSlideShow().getSlideComments(slideId);
if(slideText) {
extractText(slide.getCSld().getSpTree(), text);
extractText(rawSlide.getCSld().getSpTree(), text);
// Comments too for the slide
if(comments != null) {
for(CTComment comment : comments.getCmArray()) {
// TODO - comment authors too
// (They're in another stream)
text.append(
comment.getText() + "\n"
);
}
}
}
if(notesText && notes != null) {
extractText(notes.getCSld().getSpTree(), text);

View File

@ -108,4 +108,22 @@ public class TestXSLFPowerPointExtractor extends TestCase {
"\n\n\n\n", text
);
}
public void testGetComments() throws Exception {
File file = new File(
System.getProperty("HSLF.testdata.path") +
File.separator + "45545_Comment.pptx"
);
assertTrue(file.exists());
xmlA = new XSLFSlideShow(file.toString());
XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xmlA);
String text = extractor.getText();
assertTrue(text.length() > 0);
// Check comments are there
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
}
}