Update the XSLF slide master text extraction to be optional, as HSLF already is, plus fix some indents

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1173756 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2011-09-21 16:54:22 +00:00
parent 59204cbc38
commit 766b2256cf
4 changed files with 73 additions and 23 deletions

View File

@ -34,7 +34,7 @@
<changes>
<release version="3.8-beta5" date="2011-??-??">
<action dev="poi-developers" type="add">51804 - include Master Slide text in XSLF text extraction</action>
<action dev="poi-developers" type="add">51804 - optionally include Master Slide text in XSLF text extraction, as HSLF already offers</action>
<action dev="poi-developers" type="add">New PackagePart method getRelatedPart(PackageRelationship) to simplify navigation of relations between OPC Parts</action>
<action dev="poi-developers" type="fix">51832 - handle XLS files where the WRITEPROTECT record preceeds the FILEPASS one, rather than following as normal</action>
<action dev="poi-developers" type="fix">51809 - correct GTE handling in COUNTIF</action>

View File

@ -45,6 +45,7 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
private XMLSlideShow slideshow;
private boolean slidesByDefault = true;
private boolean notesByDefault = false;
private boolean masterByDefault = false;
public XSLFPowerPointExtractor(XMLSlideShow slideshow) {
super(slideshow);
@ -84,6 +85,13 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
this.notesByDefault = notesByDefault;
}
/**
* Should a call to getText() return text from master? Default is no
*/
public void setMasterByDefault(boolean masterByDefault) {
this.masterByDefault = masterByDefault;
}
/**
* Gets the slide text, but not the notes text
*/
@ -97,6 +105,16 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
* @param notesText Should we retrieve text from notes?
*/
public String getText(boolean slideText, boolean notesText) {
return getText(slideText, notesText, masterByDefault);
}
/**
* Gets the requested text from the file
* @param slideText Should we retrieve text from slides?
* @param notesText Should we retrieve text from notes?
* @param masterText Should we retrieve text from master slides?
*/
public String getText(boolean slideText, boolean notesText, boolean masterText) {
StringBuffer text = new StringBuffer();
XSLFSlide[] slides = slideshow.getSlides();
@ -115,8 +133,8 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
if (slideText) {
extractText(slide.getCommonSlideData(), text);
// If there's a master sheet, grab text from there
if(master != null) {
// If there's a master sheet and it's requested, grab text from there
if(masterText && master != null) {
extractText(master.getCommonSlideData(), text);
}

View File

@ -68,22 +68,18 @@ public class TestXSLFPowerPointExtractor extends TestCase {
"Fifth level\n";
// Just slides, no notes
text = extractor.getText(true, false);
text = extractor.getText(true, false, false);
assertEquals(
"Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n" +
masterText +
"\n\n\n" +
"Lorem ipsum dolor sit amet\n" +
"Lorem\n" +
"ipsum\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
"\n" +
masterText +
"\n\n\n"
"\n"
, text
);
@ -94,25 +90,61 @@ public class TestXSLFPowerPointExtractor extends TestCase {
);
// Both
text = extractor.getText(true, true);
text = extractor.getText(true, true, false);
assertEquals(
"Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n" +
masterText +
"\n\n\n\n\n" +
"\n\n\n" +
"Lorem ipsum dolor sit amet\n" +
"Lorem\n" +
"ipsum\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
"\n" +
masterText +
"\n\n\n\n\n"
"\n\n\n"
, text
);
// With Slides and Master Text
text = extractor.getText(true, false, true);
assertEquals(
"Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n" +
masterText +
"\n\n\n" +
"Lorem ipsum dolor sit amet\n" +
"Lorem\n" +
"ipsum\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
"\n" +
masterText +
"\n\n\n"
, text
);
// With Slides, Notes and Master Text
text = extractor.getText(true, true, true);
assertEquals(
"Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n" +
masterText +
"\n\n\n\n\n" +
"Lorem ipsum dolor sit amet\n" +
"Lorem\n" +
"ipsum\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
"\n" +
masterText +
"\n\n\n\n\n"
, text
);
// Via set defaults
extractor.setSlidesByDefault(false);
extractor.setNotesByDefault(true);

View File

@ -39,14 +39,14 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
* @author Nick Burch
*/
public final class PowerPointExtractor extends POIOLE2TextExtractor {
private HSLFSlideShow _hslfshow;
private SlideShow _show;
private Slide[] _slides;
private HSLFSlideShow _hslfshow;
private SlideShow _show;
private Slide[] _slides;
private boolean _slidesByDefault = true;
private boolean _notesByDefault = false;
private boolean _commentsByDefault = false;
private boolean _masterByDefault = false;
private boolean _slidesByDefault = true;
private boolean _notesByDefault = false;
private boolean _commentsByDefault = false;
private boolean _masterByDefault = false;
/**
* Basic extractor. Returns all the text, and optionally all the notes