diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index ee49e67d7..ace9d1164 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 51803 - fixed HSLF TextExtractor to extract content from master slide 52190 - null check on XWPF setFontFamily 52062 - ensure that temporary files in SXSSF are deleted 50936 - Exception parsing MS Word 8.0 file (as duplicate of 47958) diff --git a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java index 8a195ac25..6610cde2c 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java @@ -221,7 +221,22 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor { if (getSlideText) { if (getMasterText) { for (SlideMaster master : _show.getSlidesMasters()) { - textRunsToText(ret, master.getTextRuns()); + for(Shape sh : master.getShapes()){ + if(sh instanceof TextShape){ + if(MasterSheet.isPlaceholder(sh)) { + // don't bother about boiler + // plate text on master + // sheets + continue; + } + TextShape tsh = (TextShape)sh; + String text = tsh.getText(); + ret.append(text); + if (!text.endsWith("\n")) { + ret.append("\n"); + } + } + } } } diff --git a/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java b/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java index e52466539..7850d774b 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java +++ b/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java @@ -24,7 +24,6 @@ import org.apache.poi.hslf.usermodel.SlideShow; import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import java.util.Vector; import java.awt.*; /** @@ -122,7 +121,7 @@ public abstract class Sheet { * For a given PPDrawing, grab all the TextRuns */ public static TextRun[] findTextRuns(PPDrawing ppdrawing) { - Vector runsV = new Vector(); + final List runsV = new ArrayList(); EscherTextboxWrapper[] wrappers = ppdrawing.getTextboxWrappers(); for (int i = 0; i < wrappers.length; i++) { int s1 = runsV.size(); @@ -132,15 +131,11 @@ public abstract class Sheet { findTextRuns(wrappers[i].getChildRecords(), runsV); int s2 = runsV.size(); if (s2 != s1){ - TextRun t = (TextRun) runsV.get(runsV.size()-1); + TextRun t = runsV.get(runsV.size()-1); t.setShapeId(wrappers[i].getShapeId()); } } - TextRun[] runs = new TextRun[runsV.size()]; - for (int i = 0; i < runs.length; i++) { - runs[i] = (TextRun) runsV.get(i); - } - return runs; + return runsV.toArray(new TextRun[runsV.size()]); } /** @@ -151,7 +146,7 @@ public abstract class Sheet { * @param records the records to build from * @param found vector to add any found to */ - protected static void findTextRuns(Record[] records, Vector found) { + protected static void findTextRuns(Record[] records, List found) { // Look for a TextHeaderAtom for (int i = 0, slwtIndex=0; i < (records.length - 1); i++) { if (records[i] instanceof TextHeaderAtom) { diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java index c457c9c8c..56eb0a0d9 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java @@ -292,13 +292,15 @@ public final class TestExtractor extends TestCase { // Initially not there String text = ppe.getText(); - assertFalse(text.contains("Master Header Text")); + assertFalse(text.contains("Text that I added to the master slide")); // Enable, shows up ppe.setMasterByDefault(true); text = ppe.getText(); - assertTrue(text.contains("Master Header Text")); - + assertTrue(text.contains("Text that I added to the master slide")); + + // Make sure placeholder text does not come out + assertFalse(text.contains("Click to edit Master")); // Now with another file only containing master text // Will always show up diff --git a/test-data/slideshow/master_text.ppt b/test-data/slideshow/master_text.ppt index a748e8b21..cdcf4bc00 100644 Binary files a/test-data/slideshow/master_text.ppt and b/test-data/slideshow/master_text.ppt differ