Update XSLFPowerPointExtractor to include Comment Authors along with comment text

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1165112 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2011-09-04 21:16:32 +00:00
parent 27304042f8
commit e3a6f2ad80
3 changed files with 21 additions and 5 deletions

View File

@ -34,6 +34,9 @@
<changes> <changes>
<release version="3.8-beta5" date="2011-??-??"> <release version="3.8-beta5" date="2011-??-??">
<action dev="poi-developers" type="add">XSLFPowerPointExtractor support for including comment authors with comment text</action>
<action dev="poi-developers" type="fix">Converted XSLFPowerPointExtractor to use UserModel for all text extraction</action>
<action dev="poi-developers" type="add">XSLF initial UserModel support for Notes and Comments for Slides</action>
</release> </release>
<release version="3.8-beta4" date="2011-08-26"> <release version="3.8-beta4" date="2011-08-26">
<action dev="poi-developers" type="fix">51678 - Extracting text from Bug51524.zip is slow</action> <action dev="poi-developers" type="fix">51678 - Extracting text from Bug51524.zip is slow</action>

View File

@ -24,6 +24,7 @@ import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xslf.XSLFSlideShow; import org.apache.poi.xslf.XSLFSlideShow;
import org.apache.poi.xslf.usermodel.DrawingParagraph; import org.apache.poi.xslf.usermodel.DrawingParagraph;
import org.apache.poi.xslf.usermodel.XMLSlideShow; import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFCommentAuthors;
import org.apache.poi.xslf.usermodel.XSLFComments; import org.apache.poi.xslf.usermodel.XSLFComments;
import org.apache.poi.xslf.usermodel.XSLFCommonSlideData; import org.apache.poi.xslf.usermodel.XSLFCommonSlideData;
import org.apache.poi.xslf.usermodel.XSLFNotes; import org.apache.poi.xslf.usermodel.XSLFNotes;
@ -31,6 +32,7 @@ import org.apache.poi.xslf.usermodel.XSLFRelation;
import org.apache.poi.xslf.usermodel.XSLFSlide; import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.xmlbeans.XmlException; import org.apache.xmlbeans.XmlException;
import org.openxmlformats.schemas.presentationml.x2006.main.CTComment; import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentAuthor;
public class XSLFPowerPointExtractor extends POIXMLTextExtractor { public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
public static final XSLFRelation[] SUPPORTED_TYPES = new XSLFRelation[] { public static final XSLFRelation[] SUPPORTED_TYPES = new XSLFRelation[] {
@ -97,6 +99,7 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
StringBuffer text = new StringBuffer(); StringBuffer text = new StringBuffer();
XSLFSlide[] slides = slideshow.getSlides(); XSLFSlide[] slides = slideshow.getSlides();
XSLFCommentAuthors commentAuthors = slideshow.getCommentAuthors();
for (XSLFSlide slide : slides) { for (XSLFSlide slide : slides) {
try { try {
@ -112,11 +115,17 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
// If the slide has comments, do those too // If the slide has comments, do those too
if (comments != null) { if (comments != null) {
for (CTComment comment : comments.getCTCommentsList().getCmList()) { for (CTComment comment : comments.getCTCommentsList().getCmList()) {
// TODO - comment authors too // Do the author if we can
// (They're in another stream) if (commentAuthors != null) {
text.append( CTCommentAuthor author = commentAuthors.getAuthorById(comment.getAuthorId());
comment.getText() + "\n" if(author != null) {
); text.append(author.getName() + ": ");
}
}
// Then the comment text, with a new line afterwards
text.append(comment.getText());
text.append("\n");
} }
} }
} }

View File

@ -112,6 +112,10 @@ public class TestXSLFPowerPointExtractor extends TestCase {
// Check comments are there // Check comments are there
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
// Check the authors came through too
assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01"));
} }
public void testTable() throws Exception { public void testTable() throws Exception {