From 2c9a15dc3b099cb4a0c27b24bbb2c6fe4140838f Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Wed, 21 Sep 2011 16:32:52 +0000 Subject: [PATCH] Fix bug #51804 - Include Master Slide text in XSLF text extraction git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1173749 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/status.xml | 1 + .../extractor/XSLFPowerPointExtractor.java | 8 ++ .../TestXSLFPowerPointExtractor.java | 100 ++++++++++++------ 3 files changed, 76 insertions(+), 33 deletions(-) diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 507b79199..350f4b255 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 51804 - include Master Slide text in XSLF text extraction New PackagePart method getRelatedPart(PackageRelationship) to simplify navigation of relations between OPC Parts 51832 - handle XLS files where the WRITEPROTECT record preceeds the FILEPASS one, rather than following as normal 51809 - correct GTE handling in COUNTIF diff --git a/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java b/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java index 536a34bfb..930d16142 100644 --- a/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java +++ b/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java @@ -30,6 +30,7 @@ import org.apache.poi.xslf.usermodel.XSLFCommonSlideData; import org.apache.poi.xslf.usermodel.XSLFNotes; import org.apache.poi.xslf.usermodel.XSLFRelation; import org.apache.poi.xslf.usermodel.XSLFSlide; +import org.apache.poi.xslf.usermodel.XSLFSlideMaster; import org.apache.xmlbeans.XmlException; import org.openxmlformats.schemas.presentationml.x2006.main.CTComment; import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentAuthor; @@ -105,12 +106,19 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor { try { XSLFNotes notes = slide.getNotes(); XSLFComments comments = slide.getComments(); + XSLFSlideMaster master = slide.getMasterSheet(); // TODO Do the slide's name + // (Stored in docProps/app.xml) // Do the slide's text if requested if (slideText) { extractText(slide.getCommonSlideData(), text); + + // If there's a master sheet, grab text from there + if(master != null) { + extractText(master.getCommonSlideData(), text); + } // If the slide has comments, do those too if (comments != null) { diff --git a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java index 8cf2afbe6..2b575fe56 100644 --- a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java @@ -26,17 +26,19 @@ import junit.framework.TestCase; * Tests for HXFPowerPointExtractor */ public class TestXSLFPowerPointExtractor extends TestCase { - /** - * A simple file - */ - private XSLFSlideShow xmlA; - private OPCPackage pkg; + /** + * A simple file + */ + private XSLFSlideShow xmlA; + private OPCPackage pkg; + + private POIDataSamples slTests = POIDataSamples.getSlideShowInstance(); - protected void setUp() throws Exception { - POIDataSamples slTests = POIDataSamples.getSlideShowInstance(); - pkg = OPCPackage.open(slTests.openResourceAsStream("sample.pptx")); - xmlA = new XSLFSlideShow(pkg); - } + protected void setUp() throws Exception { + slTests = POIDataSamples.getSlideShowInstance(); + pkg = OPCPackage.open(slTests.openResourceAsStream("sample.pptx")); + xmlA = new XSLFSlideShow(pkg); + } /** * Get text out of the simple file @@ -54,7 +56,16 @@ public class TestXSLFPowerPointExtractor extends TestCase { // Check Basics assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n")); - assertTrue(text.endsWith("amet\n\n")); + assertTrue(text.contains("amet\n\n")); + + // Our master text, for tests + String masterText = + "Click to edit Master title style\n" + + "Click to edit Master text styles\n" + + "Second level\n" + + "Third level\n" + + "Fourth level\n" + + "Fifth level\n"; // Just slides, no notes text = extractor.getText(true, false); @@ -62,13 +73,18 @@ public class TestXSLFPowerPointExtractor extends TestCase { "Lorem ipsum dolor sit amet\n" + "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + "\n" + + masterText + + "\n\n\n" + "Lorem ipsum dolor sit amet\n" + "Lorem\n" + "ipsum\n" + "dolor\n" + "sit\n" + "amet\n" + - "\n", text + "\n" + + masterText + + "\n\n\n" + , text ); // Just notes, no slides @@ -82,14 +98,19 @@ public class TestXSLFPowerPointExtractor extends TestCase { assertEquals( "Lorem ipsum dolor sit amet\n" + "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + - "\n\n\n" + + "\n" + + masterText + + "\n\n\n\n\n" + "Lorem ipsum dolor sit amet\n" + "Lorem\n" + "ipsum\n" + "dolor\n" + "sit\n" + "amet\n" + - "\n\n\n", text + "\n" + + masterText + + "\n\n\n\n\n" + , text ); // Via set defaults @@ -101,28 +122,41 @@ public class TestXSLFPowerPointExtractor extends TestCase { ); } - public void testGetComments() throws Exception { - POIDataSamples slTests = POIDataSamples.getSlideShowInstance(); - xmlA = new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("45545_Comment.pptx"))); - XSLFPowerPointExtractor extractor = - new XSLFPowerPointExtractor(xmlA); - - String text = extractor.getText(); - assertTrue(text.length() > 0); - - // Check comments are there - assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); + public void testGetComments() throws Exception { + XSLFSlideShow xml = + new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("45545_Comment.pptx"))); + XSLFPowerPointExtractor extractor = + new XSLFPowerPointExtractor(xml); + + String text = extractor.getText(); + assertTrue(text.length() > 0); + + // Check comments are there + assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); - - // Check the authors came through too + + // Check the authors came through too assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01")); + } + + public void testGetMasterText() throws Exception { + XSLFSlideShow xml = + new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("WithMaster.pptx"))); + XSLFPowerPointExtractor extractor = + new XSLFPowerPointExtractor(xml); + + String text = extractor.getText(); + assertTrue(text.length() > 0); + + // Check master text is there + assertTrue("Unable to find expected word in text\n" + text, text.contains("Master footer is here")); } public void testTable() throws Exception { - POIDataSamples slTests = POIDataSamples.getSlideShowInstance(); - xmlA = new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx"))); + XSLFSlideShow xml = + new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx"))); XSLFPowerPointExtractor extractor = - new XSLFPowerPointExtractor(xmlA); + new XSLFPowerPointExtractor(xml); String text = extractor.getText(); assertTrue(text.length() > 0); @@ -137,7 +171,6 @@ public class TestXSLFPowerPointExtractor extends TestCase { * well as from the normal file */ public void testDifferentSubformats() throws Exception { - POIDataSamples slTests = POIDataSamples.getSlideShowInstance(); String[] extensions = new String[] { "pptx", "pptm", "ppsm", "ppsx", "thmx", @@ -145,9 +178,10 @@ public class TestXSLFPowerPointExtractor extends TestCase { }; for(String extension : extensions) { String filename = "testPPT." + extension; - xmlA = new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream(filename))); + XSLFSlideShow xml = + new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream(filename))); XSLFPowerPointExtractor extractor = - new XSLFPowerPointExtractor(xmlA); + new XSLFPowerPointExtractor(xml); String text = extractor.getText(); if(extension.equals("thmx")) {