Fix bug #51804 - Include Master Slide text in XSLF text extraction

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1173749 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2011-09-21 16:32:52 +00:00
parent 36da306131
commit 2c9a15dc3b
3 changed files with 76 additions and 33 deletions

View File

@ -34,6 +34,7 @@
<changes> <changes>
<release version="3.8-beta5" date="2011-??-??"> <release version="3.8-beta5" date="2011-??-??">
<action dev="poi-developers" type="add">51804 - include Master Slide text in XSLF text extraction</action>
<action dev="poi-developers" type="add">New PackagePart method getRelatedPart(PackageRelationship) to simplify navigation of relations between OPC Parts</action> <action dev="poi-developers" type="add">New PackagePart method getRelatedPart(PackageRelationship) to simplify navigation of relations between OPC Parts</action>
<action dev="poi-developers" type="fix">51832 - handle XLS files where the WRITEPROTECT record preceeds the FILEPASS one, rather than following as normal</action> <action dev="poi-developers" type="fix">51832 - handle XLS files where the WRITEPROTECT record preceeds the FILEPASS one, rather than following as normal</action>
<action dev="poi-developers" type="fix">51809 - correct GTE handling in COUNTIF</action> <action dev="poi-developers" type="fix">51809 - correct GTE handling in COUNTIF</action>

View File

@ -30,6 +30,7 @@ import org.apache.poi.xslf.usermodel.XSLFCommonSlideData;
import org.apache.poi.xslf.usermodel.XSLFNotes; import org.apache.poi.xslf.usermodel.XSLFNotes;
import org.apache.poi.xslf.usermodel.XSLFRelation; import org.apache.poi.xslf.usermodel.XSLFRelation;
import org.apache.poi.xslf.usermodel.XSLFSlide; import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.poi.xslf.usermodel.XSLFSlideMaster;
import org.apache.xmlbeans.XmlException; import org.apache.xmlbeans.XmlException;
import org.openxmlformats.schemas.presentationml.x2006.main.CTComment; import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentAuthor; import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentAuthor;
@ -105,12 +106,19 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
try { try {
XSLFNotes notes = slide.getNotes(); XSLFNotes notes = slide.getNotes();
XSLFComments comments = slide.getComments(); XSLFComments comments = slide.getComments();
XSLFSlideMaster master = slide.getMasterSheet();
// TODO Do the slide's name // TODO Do the slide's name
// (Stored in docProps/app.xml)
// Do the slide's text if requested // Do the slide's text if requested
if (slideText) { if (slideText) {
extractText(slide.getCommonSlideData(), text); extractText(slide.getCommonSlideData(), text);
// If there's a master sheet, grab text from there
if(master != null) {
extractText(master.getCommonSlideData(), text);
}
// If the slide has comments, do those too // If the slide has comments, do those too
if (comments != null) { if (comments != null) {

View File

@ -26,17 +26,19 @@ import junit.framework.TestCase;
* Tests for HXFPowerPointExtractor * Tests for HXFPowerPointExtractor
*/ */
public class TestXSLFPowerPointExtractor extends TestCase { public class TestXSLFPowerPointExtractor extends TestCase {
/** /**
* A simple file * A simple file
*/ */
private XSLFSlideShow xmlA; private XSLFSlideShow xmlA;
private OPCPackage pkg; private OPCPackage pkg;
private POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
protected void setUp() throws Exception { protected void setUp() throws Exception {
POIDataSamples slTests = POIDataSamples.getSlideShowInstance(); slTests = POIDataSamples.getSlideShowInstance();
pkg = OPCPackage.open(slTests.openResourceAsStream("sample.pptx")); pkg = OPCPackage.open(slTests.openResourceAsStream("sample.pptx"));
xmlA = new XSLFSlideShow(pkg); xmlA = new XSLFSlideShow(pkg);
} }
/** /**
* Get text out of the simple file * Get text out of the simple file
@ -54,7 +56,16 @@ public class TestXSLFPowerPointExtractor extends TestCase {
// Check Basics // Check Basics
assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n")); assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n"));
assertTrue(text.endsWith("amet\n\n")); assertTrue(text.contains("amet\n\n"));
// Our master text, for tests
String masterText =
"Click to edit Master title style\n" +
"Click to edit Master text styles\n" +
"Second level\n" +
"Third level\n" +
"Fourth level\n" +
"Fifth level\n";
// Just slides, no notes // Just slides, no notes
text = extractor.getText(true, false); text = extractor.getText(true, false);
@ -62,13 +73,18 @@ public class TestXSLFPowerPointExtractor extends TestCase {
"Lorem ipsum dolor sit amet\n" + "Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n" + "\n" +
masterText +
"\n\n\n" +
"Lorem ipsum dolor sit amet\n" + "Lorem ipsum dolor sit amet\n" +
"Lorem\n" + "Lorem\n" +
"ipsum\n" + "ipsum\n" +
"dolor\n" + "dolor\n" +
"sit\n" + "sit\n" +
"amet\n" + "amet\n" +
"\n", text "\n" +
masterText +
"\n\n\n"
, text
); );
// Just notes, no slides // Just notes, no slides
@ -82,14 +98,19 @@ public class TestXSLFPowerPointExtractor extends TestCase {
assertEquals( assertEquals(
"Lorem ipsum dolor sit amet\n" + "Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n\n\n" + "\n" +
masterText +
"\n\n\n\n\n" +
"Lorem ipsum dolor sit amet\n" + "Lorem ipsum dolor sit amet\n" +
"Lorem\n" + "Lorem\n" +
"ipsum\n" + "ipsum\n" +
"dolor\n" + "dolor\n" +
"sit\n" + "sit\n" +
"amet\n" + "amet\n" +
"\n\n\n", text "\n" +
masterText +
"\n\n\n\n\n"
, text
); );
// Via set defaults // Via set defaults
@ -101,28 +122,41 @@ public class TestXSLFPowerPointExtractor extends TestCase {
); );
} }
public void testGetComments() throws Exception { public void testGetComments() throws Exception {
POIDataSamples slTests = POIDataSamples.getSlideShowInstance(); XSLFSlideShow xml =
xmlA = new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("45545_Comment.pptx"))); new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("45545_Comment.pptx")));
XSLFPowerPointExtractor extractor = XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xmlA); new XSLFPowerPointExtractor(xml);
String text = extractor.getText(); String text = extractor.getText();
assertTrue(text.length() > 0); assertTrue(text.length() > 0);
// Check comments are there // Check comments are there
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
// Check the authors came through too // Check the authors came through too
assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01")); assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01"));
}
public void testGetMasterText() throws Exception {
XSLFSlideShow xml =
new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("WithMaster.pptx")));
XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xml);
String text = extractor.getText();
assertTrue(text.length() > 0);
// Check master text is there
assertTrue("Unable to find expected word in text\n" + text, text.contains("Master footer is here"));
} }
public void testTable() throws Exception { public void testTable() throws Exception {
POIDataSamples slTests = POIDataSamples.getSlideShowInstance(); XSLFSlideShow xml =
xmlA = new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx"))); new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx")));
XSLFPowerPointExtractor extractor = XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xmlA); new XSLFPowerPointExtractor(xml);
String text = extractor.getText(); String text = extractor.getText();
assertTrue(text.length() > 0); assertTrue(text.length() > 0);
@ -137,7 +171,6 @@ public class TestXSLFPowerPointExtractor extends TestCase {
* well as from the normal file * well as from the normal file
*/ */
public void testDifferentSubformats() throws Exception { public void testDifferentSubformats() throws Exception {
POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
String[] extensions = new String[] { String[] extensions = new String[] {
"pptx", "pptm", "ppsm", "ppsx", "pptx", "pptm", "ppsm", "ppsx",
"thmx", "thmx",
@ -145,9 +178,10 @@ public class TestXSLFPowerPointExtractor extends TestCase {
}; };
for(String extension : extensions) { for(String extension : extensions) {
String filename = "testPPT." + extension; String filename = "testPPT." + extension;
xmlA = new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream(filename))); XSLFSlideShow xml =
new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream(filename)));
XSLFPowerPointExtractor extractor = XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xmlA); new XSLFPowerPointExtractor(xml);
String text = extractor.getText(); String text = extractor.getText();
if(extension.equals("thmx")) { if(extension.equals("thmx")) {