Fix bug #51804 - Include Master Slide text in XSLF text extraction

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1173749 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2011-09-21 16:32:52 +00:00
parent 36da306131
commit 2c9a15dc3b
3 changed files with 76 additions and 33 deletions

View File

@ -34,6 +34,7 @@
<changes>
<release version="3.8-beta5" date="2011-??-??">
<action dev="poi-developers" type="add">51804 - include Master Slide text in XSLF text extraction</action>
<action dev="poi-developers" type="add">New PackagePart method getRelatedPart(PackageRelationship) to simplify navigation of relations between OPC Parts</action>
<action dev="poi-developers" type="fix">51832 - handle XLS files where the WRITEPROTECT record preceeds the FILEPASS one, rather than following as normal</action>
<action dev="poi-developers" type="fix">51809 - correct GTE handling in COUNTIF</action>

View File

@ -30,6 +30,7 @@ import org.apache.poi.xslf.usermodel.XSLFCommonSlideData;
import org.apache.poi.xslf.usermodel.XSLFNotes;
import org.apache.poi.xslf.usermodel.XSLFRelation;
import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.poi.xslf.usermodel.XSLFSlideMaster;
import org.apache.xmlbeans.XmlException;
import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentAuthor;
@ -105,12 +106,19 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
try {
XSLFNotes notes = slide.getNotes();
XSLFComments comments = slide.getComments();
XSLFSlideMaster master = slide.getMasterSheet();
// TODO Do the slide's name
// (Stored in docProps/app.xml)
// Do the slide's text if requested
if (slideText) {
extractText(slide.getCommonSlideData(), text);
// If there's a master sheet, grab text from there
if(master != null) {
extractText(master.getCommonSlideData(), text);
}
// If the slide has comments, do those too
if (comments != null) {

View File

@ -26,17 +26,19 @@ import junit.framework.TestCase;
* Tests for HXFPowerPointExtractor
*/
public class TestXSLFPowerPointExtractor extends TestCase {
/**
* A simple file
*/
private XSLFSlideShow xmlA;
private OPCPackage pkg;
/**
* A simple file
*/
private XSLFSlideShow xmlA;
private OPCPackage pkg;
private POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
protected void setUp() throws Exception {
POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
pkg = OPCPackage.open(slTests.openResourceAsStream("sample.pptx"));
xmlA = new XSLFSlideShow(pkg);
}
protected void setUp() throws Exception {
slTests = POIDataSamples.getSlideShowInstance();
pkg = OPCPackage.open(slTests.openResourceAsStream("sample.pptx"));
xmlA = new XSLFSlideShow(pkg);
}
/**
* Get text out of the simple file
@ -54,7 +56,16 @@ public class TestXSLFPowerPointExtractor extends TestCase {
// Check Basics
assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n"));
assertTrue(text.endsWith("amet\n\n"));
assertTrue(text.contains("amet\n\n"));
// Our master text, for tests
String masterText =
"Click to edit Master title style\n" +
"Click to edit Master text styles\n" +
"Second level\n" +
"Third level\n" +
"Fourth level\n" +
"Fifth level\n";
// Just slides, no notes
text = extractor.getText(true, false);
@ -62,13 +73,18 @@ public class TestXSLFPowerPointExtractor extends TestCase {
"Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n" +
masterText +
"\n\n\n" +
"Lorem ipsum dolor sit amet\n" +
"Lorem\n" +
"ipsum\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
"\n", text
"\n" +
masterText +
"\n\n\n"
, text
);
// Just notes, no slides
@ -82,14 +98,19 @@ public class TestXSLFPowerPointExtractor extends TestCase {
assertEquals(
"Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n\n\n" +
"\n" +
masterText +
"\n\n\n\n\n" +
"Lorem ipsum dolor sit amet\n" +
"Lorem\n" +
"ipsum\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
"\n\n\n", text
"\n" +
masterText +
"\n\n\n\n\n"
, text
);
// Via set defaults
@ -101,28 +122,41 @@ public class TestXSLFPowerPointExtractor extends TestCase {
);
}
public void testGetComments() throws Exception {
POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
xmlA = new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("45545_Comment.pptx")));
XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xmlA);
String text = extractor.getText();
assertTrue(text.length() > 0);
// Check comments are there
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
public void testGetComments() throws Exception {
XSLFSlideShow xml =
new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("45545_Comment.pptx")));
XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xml);
String text = extractor.getText();
assertTrue(text.length() > 0);
// Check comments are there
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
// Check the authors came through too
// Check the authors came through too
assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01"));
}
public void testGetMasterText() throws Exception {
XSLFSlideShow xml =
new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("WithMaster.pptx")));
XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xml);
String text = extractor.getText();
assertTrue(text.length() > 0);
// Check master text is there
assertTrue("Unable to find expected word in text\n" + text, text.contains("Master footer is here"));
}
public void testTable() throws Exception {
POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
xmlA = new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx")));
XSLFSlideShow xml =
new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx")));
XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xmlA);
new XSLFPowerPointExtractor(xml);
String text = extractor.getText();
assertTrue(text.length() > 0);
@ -137,7 +171,6 @@ public class TestXSLFPowerPointExtractor extends TestCase {
* well as from the normal file
*/
public void testDifferentSubformats() throws Exception {
POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
String[] extensions = new String[] {
"pptx", "pptm", "ppsm", "ppsx",
"thmx",
@ -145,9 +178,10 @@ public class TestXSLFPowerPointExtractor extends TestCase {
};
for(String extension : extensions) {
String filename = "testPPT." + extension;
xmlA = new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream(filename)));
XSLFSlideShow xml =
new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream(filename)));
XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xmlA);
new XSLFPowerPointExtractor(xml);
String text = extractor.getText();
if(extension.equals("thmx")) {