Make the ooxml and ole2 powerpoint extractors more in keeping with the others, and with each other
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@608830 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f3177bf387
commit
fabb3a53c0
@ -37,6 +37,8 @@ import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
|
|||||||
|
|
||||||
public class HXFPowerPointExtractor extends POIXMLTextExtractor {
|
public class HXFPowerPointExtractor extends POIXMLTextExtractor {
|
||||||
private HSLFXMLSlideShow slideshow;
|
private HSLFXMLSlideShow slideshow;
|
||||||
|
private boolean slidesByDefault = true;
|
||||||
|
private boolean notesByDefault = false;
|
||||||
|
|
||||||
public HXFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
|
public HXFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
|
||||||
this(new HSLFXMLSlideShow(
|
this(new HSLFXMLSlideShow(
|
||||||
@ -62,10 +64,25 @@ public class HXFPowerPointExtractor extends POIXMLTextExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the slide and notes text
|
* Should a call to getText() return slide text?
|
||||||
|
* Default is yes
|
||||||
|
*/
|
||||||
|
public void setSlidesByDefault(boolean slidesByDefault) {
|
||||||
|
this.slidesByDefault = slidesByDefault;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Should a call to getText() return notes text?
|
||||||
|
* Default is no
|
||||||
|
*/
|
||||||
|
public void setNotesByDefault(boolean notesByDefault) {
|
||||||
|
this.notesByDefault = notesByDefault;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the slide text, but not the notes text
|
||||||
*/
|
*/
|
||||||
public String getText() {
|
public String getText() {
|
||||||
return getText(true, true);
|
return getText(slidesByDefault, notesByDefault);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -60,7 +60,7 @@ public class TestHXFPowerPointExtractor extends TestCase {
|
|||||||
|
|
||||||
// Check Basics
|
// Check Basics
|
||||||
assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n"));
|
assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n"));
|
||||||
assertTrue(text.endsWith("amet\n\n\n\n"));
|
assertTrue(text.endsWith("amet\n\n"));
|
||||||
|
|
||||||
// Just slides, no notes
|
// Just slides, no notes
|
||||||
text = extractor.getText(true, false);
|
text = extractor.getText(true, false);
|
||||||
@ -97,5 +97,13 @@ public class TestHXFPowerPointExtractor extends TestCase {
|
|||||||
"amet\n" +
|
"amet\n" +
|
||||||
"\n\n\n", text
|
"\n\n\n", text
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Via set defaults
|
||||||
|
extractor.setSlidesByDefault(false);
|
||||||
|
extractor.setNotesByDefault(true);
|
||||||
|
text = extractor.getText();
|
||||||
|
assertEquals(
|
||||||
|
"\n\n\n\n", text
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -41,7 +41,9 @@ public class PowerPointExtractor extends POITextExtractor
|
|||||||
private HSLFSlideShow _hslfshow;
|
private HSLFSlideShow _hslfshow;
|
||||||
private SlideShow _show;
|
private SlideShow _show;
|
||||||
private Slide[] _slides;
|
private Slide[] _slides;
|
||||||
private Notes[] _notes;
|
|
||||||
|
private boolean slidesByDefault = true;
|
||||||
|
private boolean notesByDefault = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Basic extractor. Returns all the text, and optionally all the notes
|
* Basic extractor. Returns all the text, and optionally all the notes
|
||||||
@ -99,7 +101,6 @@ public class PowerPointExtractor extends POITextExtractor
|
|||||||
_hslfshow = ss;
|
_hslfshow = ss;
|
||||||
_show = new SlideShow(_hslfshow);
|
_show = new SlideShow(_hslfshow);
|
||||||
_slides = _show.getSlides();
|
_slides = _show.getSlides();
|
||||||
_notes = _show.getNotes();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -110,15 +111,31 @@ public class PowerPointExtractor extends POITextExtractor
|
|||||||
_hslfshow = null;
|
_hslfshow = null;
|
||||||
_show = null;
|
_show = null;
|
||||||
_slides = null;
|
_slides = null;
|
||||||
_notes = null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Should a call to getText() return slide text?
|
||||||
|
* Default is yes
|
||||||
|
*/
|
||||||
|
public void setSlidesByDefault(boolean slidesByDefault) {
|
||||||
|
this.slidesByDefault = slidesByDefault;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Should a call to getText() return notes text?
|
||||||
|
* Default is no
|
||||||
|
*/
|
||||||
|
public void setNotesByDefault(boolean notesByDefault) {
|
||||||
|
this.notesByDefault = notesByDefault;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fetches all the slide text from the slideshow, but not the notes
|
* Fetches all the slide text from the slideshow,
|
||||||
|
* but not the notes, unless you've called
|
||||||
|
* setSlidesByDefault() and setNotesByDefault()
|
||||||
|
* to change this
|
||||||
*/
|
*/
|
||||||
public String getText() {
|
public String getText() {
|
||||||
return getText(true,false);
|
return getText(slidesByDefault,notesByDefault);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -154,7 +171,7 @@ public class PowerPointExtractor extends POITextExtractor
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(getNoteText) {
|
if(getNoteText) {
|
||||||
ret.append(" ");
|
ret.append("\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,6 +73,29 @@ public class TextExtractor extends TestCase {
|
|||||||
ensureTwoStringsTheSame(expectText, notesText);
|
ensureTwoStringsTheSame(expectText, notesText);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testReadBoth() throws Exception {
|
||||||
|
String[] slText = new String[] {
|
||||||
|
"This is a test title\nThis is a test subtitle\nThis is on page 1\n",
|
||||||
|
"This is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n"
|
||||||
|
};
|
||||||
|
String[] ntText = new String[] {
|
||||||
|
"These are the notes for page 1\n",
|
||||||
|
"These are the notes on page two, again lacking formatting\n"
|
||||||
|
};
|
||||||
|
|
||||||
|
ppe.setSlidesByDefault(true);
|
||||||
|
ppe.setNotesByDefault(false);
|
||||||
|
assertEquals(slText[0]+slText[1], ppe.getText());
|
||||||
|
|
||||||
|
ppe.setSlidesByDefault(false);
|
||||||
|
ppe.setNotesByDefault(true);
|
||||||
|
assertEquals(ntText[0]+ntText[1], ppe.getText());
|
||||||
|
|
||||||
|
ppe.setSlidesByDefault(true);
|
||||||
|
ppe.setNotesByDefault(true);
|
||||||
|
assertEquals(slText[0]+slText[1]+"\n"+ntText[0]+ntText[1], ppe.getText());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test that when presented with a PPT file missing the odd
|
* Test that when presented with a PPT file missing the odd
|
||||||
* core record, we can still get the rest of the text out
|
* core record, we can still get the rest of the text out
|
||||||
|
Loading…
Reference in New Issue
Block a user