Fix bug #45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@682843 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4cc78b0c0e
commit
2f42314b55
@ -37,6 +37,7 @@
|
|||||||
|
|
||||||
<!-- Don't forget to update status.xml too! -->
|
<!-- Don't forget to update status.xml too! -->
|
||||||
<release version="3.5.1-beta2" date="2008-??-??">
|
<release version="3.5.1-beta2" date="2008-??-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
|
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
|
<action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">New class org.apache.poi.hssf.record.RecordFormatException, which DDF uses instead of the HSSF version, and the HSSF version inherits from</action>
|
<action dev="POI-DEVELOPERS" type="add">New class org.apache.poi.hssf.record.RecordFormatException, which DDF uses instead of the HSSF version, and the HSSF version inherits from</action>
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
<!-- Don't forget to update changes.xml too! -->
|
<!-- Don't forget to update changes.xml too! -->
|
||||||
<changes>
|
<changes>
|
||||||
<release version="3.5.1-beta2" date="2008-??-??">
|
<release version="3.5.1-beta2" date="2008-??-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
|
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
|
<action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">New class org.apache.poi.hssf.record.RecordFormatException, which DDF uses instead of the HSSF version, and the HSSF version inherits from</action>
|
<action dev="POI-DEVELOPERS" type="add">New class org.apache.poi.hssf.record.RecordFormatException, which DDF uses instead of the HSSF version, and the HSSF version inherits from</action>
|
||||||
|
@ -16,18 +16,20 @@
|
|||||||
==================================================================== */
|
==================================================================== */
|
||||||
package org.apache.poi.xslf.extractor;
|
package org.apache.poi.xslf.extractor;
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.poi.POIXMLDocument;
|
|
||||||
import org.apache.poi.POIXMLTextExtractor;
|
import org.apache.poi.POIXMLTextExtractor;
|
||||||
import org.apache.poi.xslf.XSLFSlideShow;
|
import org.apache.poi.xslf.XSLFSlideShow;
|
||||||
|
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||||
|
import org.apache.poi.xslf.usermodel.XSLFSlide;
|
||||||
import org.apache.xmlbeans.XmlException;
|
import org.apache.xmlbeans.XmlException;
|
||||||
import org.openxml4j.exceptions.OpenXML4JException;
|
import org.openxml4j.exceptions.OpenXML4JException;
|
||||||
import org.openxml4j.opc.Package;
|
import org.openxml4j.opc.Package;
|
||||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
|
import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
|
||||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
|
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
|
||||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
|
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
|
||||||
|
import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
|
||||||
|
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList;
|
||||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
|
import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
|
||||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
|
import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
|
||||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
|
import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
|
||||||
@ -35,17 +37,20 @@ import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
|
|||||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
|
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
|
||||||
|
|
||||||
public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
|
public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
|
||||||
private XSLFSlideShow slideshow;
|
private XMLSlideShow slideshow;
|
||||||
private boolean slidesByDefault = true;
|
private boolean slidesByDefault = true;
|
||||||
private boolean notesByDefault = false;
|
private boolean notesByDefault = false;
|
||||||
|
|
||||||
|
public XSLFPowerPointExtractor(XMLSlideShow slideshow) {
|
||||||
|
super(slideshow._getXSLFSlideShow());
|
||||||
|
this.slideshow = slideshow;
|
||||||
|
}
|
||||||
|
public XSLFPowerPointExtractor(XSLFSlideShow slideshow) throws XmlException, IOException {
|
||||||
|
this(new XMLSlideShow(slideshow));
|
||||||
|
}
|
||||||
public XSLFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
|
public XSLFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
|
||||||
this(new XSLFSlideShow(container));
|
this(new XSLFSlideShow(container));
|
||||||
}
|
}
|
||||||
public XSLFPowerPointExtractor(XSLFSlideShow slideshow) {
|
|
||||||
super(slideshow);
|
|
||||||
this.slideshow = slideshow;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
if(args.length < 1) {
|
if(args.length < 1) {
|
||||||
@ -89,17 +94,31 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
|
|||||||
public String getText(boolean slideText, boolean notesText) {
|
public String getText(boolean slideText, boolean notesText) {
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuffer text = new StringBuffer();
|
||||||
|
|
||||||
CTSlideIdListEntry[] slideRefs =
|
XSLFSlide[] slides = slideshow.getSlides();
|
||||||
slideshow.getSlideReferences().getSldIdArray();
|
for(int i = 0; i < slides.length; i++) {
|
||||||
for (int i = 0; i < slideRefs.length; i++) {
|
CTSlide rawSlide = slides[i]._getCTSlide();
|
||||||
|
CTSlideIdListEntry slideId = slides[i]._getCTSlideId();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
CTSlide slide =
|
// For now, still very low level
|
||||||
slideshow.getSlide(slideRefs[i]);
|
|
||||||
CTNotesSlide notes =
|
CTNotesSlide notes =
|
||||||
slideshow.getNotes(slideRefs[i]);
|
slideshow._getXSLFSlideShow().getNotes(slideId);
|
||||||
|
CTCommentList comments =
|
||||||
|
slideshow._getXSLFSlideShow().getSlideComments(slideId);
|
||||||
|
|
||||||
if(slideText) {
|
if(slideText) {
|
||||||
extractText(slide.getCSld().getSpTree(), text);
|
extractText(rawSlide.getCSld().getSpTree(), text);
|
||||||
|
|
||||||
|
// Comments too for the slide
|
||||||
|
if(comments != null) {
|
||||||
|
for(CTComment comment : comments.getCmArray()) {
|
||||||
|
// TODO - comment authors too
|
||||||
|
// (They're in another stream)
|
||||||
|
text.append(
|
||||||
|
comment.getText() + "\n"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if(notesText && notes != null) {
|
if(notesText && notes != null) {
|
||||||
extractText(notes.getCSld().getSpTree(), text);
|
extractText(notes.getCSld().getSpTree(), text);
|
||||||
|
@ -108,4 +108,22 @@ public class TestXSLFPowerPointExtractor extends TestCase {
|
|||||||
"\n\n\n\n", text
|
"\n\n\n\n", text
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testGetComments() throws Exception {
|
||||||
|
File file = new File(
|
||||||
|
System.getProperty("HSLF.testdata.path") +
|
||||||
|
File.separator + "45545_Comment.pptx"
|
||||||
|
);
|
||||||
|
assertTrue(file.exists());
|
||||||
|
|
||||||
|
xmlA = new XSLFSlideShow(file.toString());
|
||||||
|
XSLFPowerPointExtractor extractor =
|
||||||
|
new XSLFPowerPointExtractor(xmlA);
|
||||||
|
|
||||||
|
String text = extractor.getText();
|
||||||
|
assertTrue(text.length() > 0);
|
||||||
|
|
||||||
|
// Check comments are there
|
||||||
|
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user