Make the ooxml and ole2 powerpoint extractors more in keeping with the others, and with each other
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@608830 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f3177bf387
commit
fabb3a53c0
@ -37,6 +37,8 @@ import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
|
||||
|
||||
public class HXFPowerPointExtractor extends POIXMLTextExtractor {
|
||||
private HSLFXMLSlideShow slideshow;
|
||||
private boolean slidesByDefault = true;
|
||||
private boolean notesByDefault = false;
|
||||
|
||||
public HXFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
|
||||
this(new HSLFXMLSlideShow(
|
||||
@ -60,12 +62,27 @@ public class HXFPowerPointExtractor extends POIXMLTextExtractor {
|
||||
));
|
||||
System.out.println(extractor.getText());
|
||||
}
|
||||
|
||||
/**
|
||||
* Should a call to getText() return slide text?
|
||||
* Default is yes
|
||||
*/
|
||||
public void setSlidesByDefault(boolean slidesByDefault) {
|
||||
this.slidesByDefault = slidesByDefault;
|
||||
}
|
||||
/**
|
||||
* Should a call to getText() return notes text?
|
||||
* Default is no
|
||||
*/
|
||||
public void setNotesByDefault(boolean notesByDefault) {
|
||||
this.notesByDefault = notesByDefault;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the slide and notes text
|
||||
* Gets the slide text, but not the notes text
|
||||
*/
|
||||
public String getText() {
|
||||
return getText(true, true);
|
||||
return getText(slidesByDefault, notesByDefault);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -60,7 +60,7 @@ public class TestHXFPowerPointExtractor extends TestCase {
|
||||
|
||||
// Check Basics
|
||||
assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n"));
|
||||
assertTrue(text.endsWith("amet\n\n\n\n"));
|
||||
assertTrue(text.endsWith("amet\n\n"));
|
||||
|
||||
// Just slides, no notes
|
||||
text = extractor.getText(true, false);
|
||||
@ -97,5 +97,13 @@ public class TestHXFPowerPointExtractor extends TestCase {
|
||||
"amet\n" +
|
||||
"\n\n\n", text
|
||||
);
|
||||
|
||||
// Via set defaults
|
||||
extractor.setSlidesByDefault(false);
|
||||
extractor.setNotesByDefault(true);
|
||||
text = extractor.getText();
|
||||
assertEquals(
|
||||
"\n\n\n\n", text
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -41,7 +41,9 @@ public class PowerPointExtractor extends POITextExtractor
|
||||
private HSLFSlideShow _hslfshow;
|
||||
private SlideShow _show;
|
||||
private Slide[] _slides;
|
||||
private Notes[] _notes;
|
||||
|
||||
private boolean slidesByDefault = true;
|
||||
private boolean notesByDefault = false;
|
||||
|
||||
/**
|
||||
* Basic extractor. Returns all the text, and optionally all the notes
|
||||
@ -99,7 +101,6 @@ public class PowerPointExtractor extends POITextExtractor
|
||||
_hslfshow = ss;
|
||||
_show = new SlideShow(_hslfshow);
|
||||
_slides = _show.getSlides();
|
||||
_notes = _show.getNotes();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -110,23 +111,39 @@ public class PowerPointExtractor extends POITextExtractor
|
||||
_hslfshow = null;
|
||||
_show = null;
|
||||
_slides = null;
|
||||
_notes = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Should a call to getText() return slide text?
|
||||
* Default is yes
|
||||
*/
|
||||
public void setSlidesByDefault(boolean slidesByDefault) {
|
||||
this.slidesByDefault = slidesByDefault;
|
||||
}
|
||||
/**
|
||||
* Should a call to getText() return notes text?
|
||||
* Default is no
|
||||
*/
|
||||
public void setNotesByDefault(boolean notesByDefault) {
|
||||
this.notesByDefault = notesByDefault;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches all the slide text from the slideshow, but not the notes
|
||||
*/
|
||||
public String getText() {
|
||||
return getText(true,false);
|
||||
}
|
||||
/**
|
||||
* Fetches all the slide text from the slideshow,
|
||||
* but not the notes, unless you've called
|
||||
* setSlidesByDefault() and setNotesByDefault()
|
||||
* to change this
|
||||
*/
|
||||
public String getText() {
|
||||
return getText(slidesByDefault,notesByDefault);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches all the notes text from the slideshow, but not the slide text
|
||||
*/
|
||||
public String getNotes() {
|
||||
return getText(false,true);
|
||||
}
|
||||
/**
|
||||
* Fetches all the notes text from the slideshow, but not the slide text
|
||||
*/
|
||||
public String getNotes() {
|
||||
return getText(false,true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches text from the slideshow, be it slide text or note text.
|
||||
@ -154,7 +171,7 @@ public class PowerPointExtractor extends POITextExtractor
|
||||
}
|
||||
}
|
||||
if(getNoteText) {
|
||||
ret.append(" ");
|
||||
ret.append("\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -72,6 +72,29 @@ public class TextExtractor extends TestCase {
|
||||
|
||||
ensureTwoStringsTheSame(expectText, notesText);
|
||||
}
|
||||
|
||||
public void testReadBoth() throws Exception {
|
||||
String[] slText = new String[] {
|
||||
"This is a test title\nThis is a test subtitle\nThis is on page 1\n",
|
||||
"This is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n"
|
||||
};
|
||||
String[] ntText = new String[] {
|
||||
"These are the notes for page 1\n",
|
||||
"These are the notes on page two, again lacking formatting\n"
|
||||
};
|
||||
|
||||
ppe.setSlidesByDefault(true);
|
||||
ppe.setNotesByDefault(false);
|
||||
assertEquals(slText[0]+slText[1], ppe.getText());
|
||||
|
||||
ppe.setSlidesByDefault(false);
|
||||
ppe.setNotesByDefault(true);
|
||||
assertEquals(ntText[0]+ntText[1], ppe.getText());
|
||||
|
||||
ppe.setSlidesByDefault(true);
|
||||
ppe.setNotesByDefault(true);
|
||||
assertEquals(slText[0]+slText[1]+"\n"+ntText[0]+ntText[1], ppe.getText());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that when presented with a PPT file missing the odd
|
||||
|
Loading…
Reference in New Issue
Block a user