support for text extraction from PPT master slides, see Bugzilla 48161

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@835271 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2009-11-12 07:07:41 +00:00
parent 4a57314dcc
commit e8e2b1ff49
4 changed files with 53 additions and 31 deletions

View File

@ -34,6 +34,7 @@
<changes> <changes>
<release version="3.6-beta1" date="2009-??-??"> <release version="3.6-beta1" date="2009-??-??">
<action dev="POI-DEVELOPERS" type="add">48161 - support for text extraction from PPT master slides</action>
<action dev="POI-DEVELOPERS" type="add">47970 - added a method to set arabic mode in HSSFSheet</action> <action dev="POI-DEVELOPERS" type="add">47970 - added a method to set arabic mode in HSSFSheet</action>
<action dev="POI-DEVELOPERS" type="fix">48134 - release system resources when using Picture.resize()</action> <action dev="POI-DEVELOPERS" type="fix">48134 - release system resources when using Picture.resize()</action>
<action dev="POI-DEVELOPERS" type="fix">48087 - avoid NPE in XSSFChartSheet when calling methods of the superclass</action> <action dev="POI-DEVELOPERS" type="fix">48087 - avoid NPE in XSSFChartSheet when calling methods of the superclass</action>

View File

@ -45,6 +45,7 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
private boolean _slidesByDefault = true; private boolean _slidesByDefault = true;
private boolean _notesByDefault = false; private boolean _notesByDefault = false;
private boolean _commentsByDefault = false; private boolean _commentsByDefault = false;
private boolean _masterByDefault = false;
/** /**
* Basic extractor. Returns all the text, and optionally all the notes * Basic extractor. Returns all the text, and optionally all the notes
@ -58,6 +59,8 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
boolean notes = false; boolean notes = false;
boolean comments = false; boolean comments = false;
boolean master = true;
String file; String file;
if (args.length > 1) { if (args.length > 1) {
notes = true; notes = true;
@ -70,7 +73,7 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
} }
PowerPointExtractor ppe = new PowerPointExtractor(file); PowerPointExtractor ppe = new PowerPointExtractor(file);
System.out.println(ppe.getText(true, notes, comments)); System.out.println(ppe.getText(true, notes, comments, master));
} }
/** /**
@ -137,12 +140,19 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
this._commentsByDefault = commentsByDefault; this._commentsByDefault = commentsByDefault;
} }
/**
* Should a call to getText() return text from master? Default is no
*/
public void setMasterByDefault(boolean masterByDefault) {
this._masterByDefault = masterByDefault;
}
/** /**
* Fetches all the slide text from the slideshow, but not the notes, unless * Fetches all the slide text from the slideshow, but not the notes, unless
* you've called setSlidesByDefault() and setNotesByDefault() to change this * you've called setSlidesByDefault() and setNotesByDefault() to change this
*/ */
public String getText() { public String getText() {
return getText(_slidesByDefault, _notesByDefault, _commentsByDefault); return getText(_slidesByDefault, _notesByDefault, _commentsByDefault, _masterByDefault);
} }
/** /**
@ -178,14 +188,20 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
* @param getNoteText fetch note text * @param getNoteText fetch note text
*/ */
public String getText(boolean getSlideText, boolean getNoteText) { public String getText(boolean getSlideText, boolean getNoteText) {
return getText(getSlideText, getNoteText, _commentsByDefault); return getText(getSlideText, getNoteText, _commentsByDefault, _masterByDefault);
} }
public String getText(boolean getSlideText, boolean getNoteText, boolean getCommentText) { public String getText(boolean getSlideText, boolean getNoteText, boolean getCommentText, boolean getMasterText) {
StringBuffer ret = new StringBuffer(); StringBuffer ret = new StringBuffer();
if (getSlideText) { if (getSlideText) {
for (int i = 0; i < _slides.length; i++) { if (getMasterText) {
for (SlideMaster master : _show.getSlidesMasters()) {
textRunsToText(ret, master.getTextRuns());
}
}
for (int i = 0; i < _slides.length; i++) {
Slide slide = _slides[i]; Slide slide = _slides[i];
// Slide header, if set // Slide header, if set
@ -195,19 +211,9 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
} }
// Slide text // Slide text
TextRun[] runs = slide.getTextRuns(); textRunsToText(ret, slide.getTextRuns());
for (int j = 0; j < runs.length; j++) {
TextRun run = runs[j];
if (run != null) {
String text = run.getText();
ret.append(text);
if (!text.endsWith("\n")) {
ret.append("\n");
}
}
}
// Slide footer, if set // Slide footer, if set
if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) { if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
ret.append(hf.getFooterText() + "\n"); ret.append(hf.getFooterText() + "\n");
} }
@ -249,17 +255,7 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
} }
// Notes text // Notes text
TextRun[] runs = notes.getTextRuns(); textRunsToText(ret, notes.getTextRuns());
if (runs != null && runs.length > 0) {
for (int j = 0; j < runs.length; j++) {
TextRun run = runs[j];
String text = run.getText();
ret.append(text);
if (!text.endsWith("\n")) {
ret.append("\n");
}
}
}
// Repeat the notes footer, if set // Repeat the notes footer, if set
if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) { if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
@ -270,4 +266,21 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
return ret.toString(); return ret.toString();
} }
private void textRunsToText(StringBuffer ret, TextRun[] runs) {
if (runs==null) {
return;
}
for (int j = 0; j < runs.length; j++) {
TextRun run = runs[j];
if (run != null) {
String text = run.getText();
ret.append(text);
if (!text.endsWith("\n")) {
ret.append("\n");
}
}
}
}
} }

View File

@ -48,7 +48,6 @@ public final class TestExtractor extends TestCase {
//private String pdirname; //private String pdirname;
protected void setUp() throws Exception { protected void setUp() throws Exception {
ppe = new PowerPointExtractor(slTests.openResourceAsStream("basic_test_ppt_file.ppt")); ppe = new PowerPointExtractor(slTests.openResourceAsStream("basic_test_ppt_file.ppt"));
ppe2 = new PowerPointExtractor(slTests.openResourceAsStream("with_textbox.ppt")); ppe2 = new PowerPointExtractor(slTests.openResourceAsStream("with_textbox.ppt"));
} }
@ -265,4 +264,13 @@ public final class TestExtractor extends TestCase {
private static boolean contains(String text, String searchString) { private static boolean contains(String text, String searchString) {
return text.indexOf(searchString) >=0; return text.indexOf(searchString) >=0;
} }
public void testMasterText() throws Exception {
ppe = new PowerPointExtractor(slTests.openResourceAsStream("master_text.ppt"));
ppe.setMasterByDefault(true);
String text = ppe.getText();
assertTrue(text.contains("Master Header Text"));
}
} }

Binary file not shown.