diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 350f4b255..ef7ad52d5 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,7 +34,7 @@ - 51804 - include Master Slide text in XSLF text extraction + 51804 - optionally include Master Slide text in XSLF text extraction, as HSLF already offers New PackagePart method getRelatedPart(PackageRelationship) to simplify navigation of relations between OPC Parts 51832 - handle XLS files where the WRITEPROTECT record preceeds the FILEPASS one, rather than following as normal 51809 - correct GTE handling in COUNTIF diff --git a/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java b/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java index 930d16142..9563f664a 100644 --- a/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java +++ b/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java @@ -45,6 +45,7 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor { private XMLSlideShow slideshow; private boolean slidesByDefault = true; private boolean notesByDefault = false; + private boolean masterByDefault = false; public XSLFPowerPointExtractor(XMLSlideShow slideshow) { super(slideshow); @@ -84,6 +85,13 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor { this.notesByDefault = notesByDefault; } + /** + * Should a call to getText() return text from master? Default is no + */ + public void setMasterByDefault(boolean masterByDefault) { + this.masterByDefault = masterByDefault; + } + /** * Gets the slide text, but not the notes text */ @@ -97,6 +105,16 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor { * @param notesText Should we retrieve text from notes? */ public String getText(boolean slideText, boolean notesText) { + return getText(slideText, notesText, masterByDefault); + } + + /** + * Gets the requested text from the file + * @param slideText Should we retrieve text from slides? + * @param notesText Should we retrieve text from notes? + * @param masterText Should we retrieve text from master slides? + */ + public String getText(boolean slideText, boolean notesText, boolean masterText) { StringBuffer text = new StringBuffer(); XSLFSlide[] slides = slideshow.getSlides(); @@ -115,8 +133,8 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor { if (slideText) { extractText(slide.getCommonSlideData(), text); - // If there's a master sheet, grab text from there - if(master != null) { + // If there's a master sheet and it's requested, grab text from there + if(masterText && master != null) { extractText(master.getCommonSlideData(), text); } diff --git a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java index 2b575fe56..cfaf344be 100644 --- a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java @@ -68,22 +68,18 @@ public class TestXSLFPowerPointExtractor extends TestCase { "Fifth level\n"; // Just slides, no notes - text = extractor.getText(true, false); + text = extractor.getText(true, false, false); assertEquals( "Lorem ipsum dolor sit amet\n" + "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + "\n" + - masterText + - "\n\n\n" + "Lorem ipsum dolor sit amet\n" + "Lorem\n" + "ipsum\n" + "dolor\n" + "sit\n" + "amet\n" + - "\n" + - masterText + - "\n\n\n" + "\n" , text ); @@ -94,25 +90,61 @@ public class TestXSLFPowerPointExtractor extends TestCase { ); // Both - text = extractor.getText(true, true); + text = extractor.getText(true, true, false); assertEquals( "Lorem ipsum dolor sit amet\n" + "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + - "\n" + - masterText + - "\n\n\n\n\n" + + "\n\n\n" + "Lorem ipsum dolor sit amet\n" + "Lorem\n" + "ipsum\n" + "dolor\n" + "sit\n" + "amet\n" + - "\n" + - masterText + - "\n\n\n\n\n" + "\n\n\n" , text ); + // With Slides and Master Text + text = extractor.getText(true, false, true); + assertEquals( + "Lorem ipsum dolor sit amet\n" + + "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + + "\n" + + masterText + + "\n\n\n" + + "Lorem ipsum dolor sit amet\n" + + "Lorem\n" + + "ipsum\n" + + "dolor\n" + + "sit\n" + + "amet\n" + + "\n" + + masterText + + "\n\n\n" + , text + ); + + // With Slides, Notes and Master Text + text = extractor.getText(true, true, true); + assertEquals( + "Lorem ipsum dolor sit amet\n" + + "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + + "\n" + + masterText + + "\n\n\n\n\n" + + "Lorem ipsum dolor sit amet\n" + + "Lorem\n" + + "ipsum\n" + + "dolor\n" + + "sit\n" + + "amet\n" + + "\n" + + masterText + + "\n\n\n\n\n" + , text + ); + // Via set defaults extractor.setSlidesByDefault(false); extractor.setNotesByDefault(true); diff --git a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java index 239df9a69..8a195ac25 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java @@ -39,14 +39,14 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; * @author Nick Burch */ public final class PowerPointExtractor extends POIOLE2TextExtractor { - private HSLFSlideShow _hslfshow; - private SlideShow _show; - private Slide[] _slides; + private HSLFSlideShow _hslfshow; + private SlideShow _show; + private Slide[] _slides; - private boolean _slidesByDefault = true; - private boolean _notesByDefault = false; - private boolean _commentsByDefault = false; - private boolean _masterByDefault = false; + private boolean _slidesByDefault = true; + private boolean _notesByDefault = false; + private boolean _commentsByDefault = false; + private boolean _masterByDefault = false; /** * Basic extractor. Returns all the text, and optionally all the notes