Make the ooxml and ole2 powerpoint extractors more in keeping with the others, and with each other

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@608830 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-01-04 13:19:23 +00:00
parent f3177bf387
commit fabb3a53c0
4 changed files with 84 additions and 19 deletions

View File

@ -37,6 +37,8 @@ import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
public class HXFPowerPointExtractor extends POIXMLTextExtractor {
private HSLFXMLSlideShow slideshow;
private boolean slidesByDefault = true;
private boolean notesByDefault = false;
public HXFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
this(new HSLFXMLSlideShow(
@ -60,12 +62,27 @@ public class HXFPowerPointExtractor extends POIXMLTextExtractor {
));
System.out.println(extractor.getText());
}
/**
* Should a call to getText() return slide text?
* Default is yes
*/
public void setSlidesByDefault(boolean slidesByDefault) {
this.slidesByDefault = slidesByDefault;
}
/**
* Should a call to getText() return notes text?
* Default is no
*/
public void setNotesByDefault(boolean notesByDefault) {
this.notesByDefault = notesByDefault;
}
/**
* Gets the slide and notes text
* Gets the slide text, but not the notes text
*/
public String getText() {
return getText(true, true);
return getText(slidesByDefault, notesByDefault);
}
/**

View File

@ -60,7 +60,7 @@ public class TestHXFPowerPointExtractor extends TestCase {
// Check Basics
assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n"));
assertTrue(text.endsWith("amet\n\n\n\n"));
assertTrue(text.endsWith("amet\n\n"));
// Just slides, no notes
text = extractor.getText(true, false);
@ -97,5 +97,13 @@ public class TestHXFPowerPointExtractor extends TestCase {
"amet\n" +
"\n\n\n", text
);
// Via set defaults
extractor.setSlidesByDefault(false);
extractor.setNotesByDefault(true);
text = extractor.getText();
assertEquals(
"\n\n\n\n", text
);
}
}

View File

@ -41,7 +41,9 @@ public class PowerPointExtractor extends POITextExtractor
private HSLFSlideShow _hslfshow;
private SlideShow _show;
private Slide[] _slides;
private Notes[] _notes;
private boolean slidesByDefault = true;
private boolean notesByDefault = false;
/**
* Basic extractor. Returns all the text, and optionally all the notes
@ -99,7 +101,6 @@ public class PowerPointExtractor extends POITextExtractor
_hslfshow = ss;
_show = new SlideShow(_hslfshow);
_slides = _show.getSlides();
_notes = _show.getNotes();
}
/**
@ -110,23 +111,39 @@ public class PowerPointExtractor extends POITextExtractor
_hslfshow = null;
_show = null;
_slides = null;
_notes = null;
}
/**
* Should a call to getText() return slide text?
* Default is yes
*/
public void setSlidesByDefault(boolean slidesByDefault) {
this.slidesByDefault = slidesByDefault;
}
/**
* Should a call to getText() return notes text?
* Default is no
*/
public void setNotesByDefault(boolean notesByDefault) {
this.notesByDefault = notesByDefault;
}
/**
* Fetches all the slide text from the slideshow, but not the notes
*/
public String getText() {
return getText(true,false);
}
/**
* Fetches all the slide text from the slideshow,
* but not the notes, unless you've called
* setSlidesByDefault() and setNotesByDefault()
* to change this
*/
public String getText() {
return getText(slidesByDefault,notesByDefault);
}
/**
* Fetches all the notes text from the slideshow, but not the slide text
*/
public String getNotes() {
return getText(false,true);
}
/**
* Fetches all the notes text from the slideshow, but not the slide text
*/
public String getNotes() {
return getText(false,true);
}
/**
* Fetches text from the slideshow, be it slide text or note text.
@ -154,7 +171,7 @@ public class PowerPointExtractor extends POITextExtractor
}
}
if(getNoteText) {
ret.append(" ");
ret.append("\n");
}
}

View File

@ -72,6 +72,29 @@ public class TextExtractor extends TestCase {
ensureTwoStringsTheSame(expectText, notesText);
}
public void testReadBoth() throws Exception {
String[] slText = new String[] {
"This is a test title\nThis is a test subtitle\nThis is on page 1\n",
"This is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n"
};
String[] ntText = new String[] {
"These are the notes for page 1\n",
"These are the notes on page two, again lacking formatting\n"
};
ppe.setSlidesByDefault(true);
ppe.setNotesByDefault(false);
assertEquals(slText[0]+slText[1], ppe.getText());
ppe.setSlidesByDefault(false);
ppe.setNotesByDefault(true);
assertEquals(ntText[0]+ntText[1], ppe.getText());
ppe.setSlidesByDefault(true);
ppe.setNotesByDefault(true);
assertEquals(slText[0]+slText[1]+"\n"+ntText[0]+ntText[1], ppe.getText());
}
/**
* Test that when presented with a PPT file missing the odd