diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hslf/extractor/HXFPowerPointExtractor.java b/src/scratchpad/ooxml-src/org/apache/poi/hslf/extractor/HXFPowerPointExtractor.java index b0e736401..1d4b1a2bd 100644 --- a/src/scratchpad/ooxml-src/org/apache/poi/hslf/extractor/HXFPowerPointExtractor.java +++ b/src/scratchpad/ooxml-src/org/apache/poi/hslf/extractor/HXFPowerPointExtractor.java @@ -37,6 +37,8 @@ import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry; public class HXFPowerPointExtractor extends POIXMLTextExtractor { private HSLFXMLSlideShow slideshow; + private boolean slidesByDefault = true; + private boolean notesByDefault = false; public HXFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException { this(new HSLFXMLSlideShow( @@ -60,12 +62,27 @@ public class HXFPowerPointExtractor extends POIXMLTextExtractor { )); System.out.println(extractor.getText()); } + + /** + * Should a call to getText() return slide text? + * Default is yes + */ + public void setSlidesByDefault(boolean slidesByDefault) { + this.slidesByDefault = slidesByDefault; + } + /** + * Should a call to getText() return notes text? + * Default is no + */ + public void setNotesByDefault(boolean notesByDefault) { + this.notesByDefault = notesByDefault; + } /** - * Gets the slide and notes text + * Gets the slide text, but not the notes text */ public String getText() { - return getText(true, true); + return getText(slidesByDefault, notesByDefault); } /** diff --git a/src/scratchpad/ooxml-testcases/org/apache/poi/hslf/extractor/TestHXFPowerPointExtractor.java b/src/scratchpad/ooxml-testcases/org/apache/poi/hslf/extractor/TestHXFPowerPointExtractor.java index 7c96c2986..6a006ab5c 100644 --- a/src/scratchpad/ooxml-testcases/org/apache/poi/hslf/extractor/TestHXFPowerPointExtractor.java +++ b/src/scratchpad/ooxml-testcases/org/apache/poi/hslf/extractor/TestHXFPowerPointExtractor.java @@ -60,7 +60,7 @@ public class TestHXFPowerPointExtractor extends TestCase { // Check Basics assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n")); - assertTrue(text.endsWith("amet\n\n\n\n")); + assertTrue(text.endsWith("amet\n\n")); // Just slides, no notes text = extractor.getText(true, false); @@ -97,5 +97,13 @@ public class TestHXFPowerPointExtractor extends TestCase { "amet\n" + "\n\n\n", text ); + + // Via set defaults + extractor.setSlidesByDefault(false); + extractor.setNotesByDefault(true); + text = extractor.getText(); + assertEquals( + "\n\n\n\n", text + ); } } diff --git a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java index 0fc6f5e84..f24722700 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java @@ -41,7 +41,9 @@ public class PowerPointExtractor extends POITextExtractor private HSLFSlideShow _hslfshow; private SlideShow _show; private Slide[] _slides; - private Notes[] _notes; + + private boolean slidesByDefault = true; + private boolean notesByDefault = false; /** * Basic extractor. Returns all the text, and optionally all the notes @@ -99,7 +101,6 @@ public class PowerPointExtractor extends POITextExtractor _hslfshow = ss; _show = new SlideShow(_hslfshow); _slides = _show.getSlides(); - _notes = _show.getNotes(); } /** @@ -110,23 +111,39 @@ public class PowerPointExtractor extends POITextExtractor _hslfshow = null; _show = null; _slides = null; - _notes = null; } + /** + * Should a call to getText() return slide text? + * Default is yes + */ + public void setSlidesByDefault(boolean slidesByDefault) { + this.slidesByDefault = slidesByDefault; + } + /** + * Should a call to getText() return notes text? + * Default is no + */ + public void setNotesByDefault(boolean notesByDefault) { + this.notesByDefault = notesByDefault; + } - /** - * Fetches all the slide text from the slideshow, but not the notes - */ - public String getText() { - return getText(true,false); - } + /** + * Fetches all the slide text from the slideshow, + * but not the notes, unless you've called + * setSlidesByDefault() and setNotesByDefault() + * to change this + */ + public String getText() { + return getText(slidesByDefault,notesByDefault); + } - /** - * Fetches all the notes text from the slideshow, but not the slide text - */ - public String getNotes() { - return getText(false,true); - } + /** + * Fetches all the notes text from the slideshow, but not the slide text + */ + public String getNotes() { + return getText(false,true); + } /** * Fetches text from the slideshow, be it slide text or note text. @@ -154,7 +171,7 @@ public class PowerPointExtractor extends POITextExtractor } } if(getNoteText) { - ret.append(" "); + ret.append("\n"); } } diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TextExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TextExtractor.java index e0e0318e7..f8618ff0b 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TextExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TextExtractor.java @@ -72,6 +72,29 @@ public class TextExtractor extends TestCase { ensureTwoStringsTheSame(expectText, notesText); } + + public void testReadBoth() throws Exception { + String[] slText = new String[] { + "This is a test title\nThis is a test subtitle\nThis is on page 1\n", + "This is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n" + }; + String[] ntText = new String[] { + "These are the notes for page 1\n", + "These are the notes on page two, again lacking formatting\n" + }; + + ppe.setSlidesByDefault(true); + ppe.setNotesByDefault(false); + assertEquals(slText[0]+slText[1], ppe.getText()); + + ppe.setSlidesByDefault(false); + ppe.setNotesByDefault(true); + assertEquals(ntText[0]+ntText[1], ppe.getText()); + + ppe.setSlidesByDefault(true); + ppe.setNotesByDefault(true); + assertEquals(slText[0]+slText[1]+"\n"+ntText[0]+ntText[1], ppe.getText()); + } /** * Test that when presented with a PPT file missing the odd