With help from Yegor, fix bug #45537 - Include headers and footers (of slides and notes) in the extracted text from HSLF

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@683020 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-08-05 22:49:24 +00:00
parent 1d478f3af7
commit 17f779f950
5 changed files with 40 additions and 9 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! --> <!-- Don't forget to update status.xml too! -->
<release version="3.1.1-alpha1" date="2008-??-??"> <release version="3.1.1-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">45537 - Include headers and footers (of slides and notes) in the extracted text from HSLF</action>
<action dev="POI-DEVELOPERS" type="fix">45472 - Fixed incorrect default row height in OpenOffice 2.3</action> <action dev="POI-DEVELOPERS" type="fix">45472 - Fixed incorrect default row height in OpenOffice 2.3</action>
<action dev="POI-DEVELOPERS" type="fix">44692 - HSSFPicture.resize() stretched image when there was a text next to it</action> <action dev="POI-DEVELOPERS" type="fix">44692 - HSSFPicture.resize() stretched image when there was a text next to it</action>
<action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action> <action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! --> <!-- Don't forget to update changes.xml too! -->
<changes> <changes>
<release version="3.1.1-alpha1" date="2008-??-??"> <release version="3.1.1-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">45537 - Include headers and footers (of slides and notes) in the extracted text from HSLF</action>
<action dev="POI-DEVELOPERS" type="fix">45472 - Fixed incorrect default row height in OpenOffice 2.3</action> <action dev="POI-DEVELOPERS" type="fix">45472 - Fixed incorrect default row height in OpenOffice 2.3</action>
<action dev="POI-DEVELOPERS" type="fix">44692 - HSSFPicture.resize() stretched image when there was a text next to it</action> <action dev="POI-DEVELOPERS" type="fix">44692 - HSSFPicture.resize() stretched image when there was a text next to it</action>
<action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action> <action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>

View File

@ -176,11 +176,13 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
for(int i=0; i<_slides.length; i++) { for(int i=0; i<_slides.length; i++) {
Slide slide = _slides[i]; Slide slide = _slides[i];
// Slide header, if set
HeadersFooters hf = slide.getHeadersFooters(); HeadersFooters hf = slide.getHeadersFooters();
if(hf != null && hf.getHeaderText() != null) { if(hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
ret.append(hf.getHeaderText() + "\n"); ret.append(hf.getHeaderText() + "\n");
} }
// Slide text
TextRun[] runs = slide.getTextRuns(); TextRun[] runs = slide.getTextRuns();
for(int j=0; j<runs.length; j++) { for(int j=0; j<runs.length; j++) {
TextRun run = runs[j]; TextRun run = runs[j];
@ -193,10 +195,12 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
} }
} }
if(hf != null && hf.getFooterText() != null) { // Slide footer, if set
if(hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
ret.append(hf.getFooterText() + "\n"); ret.append(hf.getFooterText() + "\n");
} }
// Comments, if requested and present
if(getCommentText) { if(getCommentText) {
Comment[] comments = slide.getComments(); Comment[] comments = slide.getComments();
for(int j=0; j<comments.length; j++) { for(int j=0; j<comments.length; j++) {
@ -219,6 +223,8 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
// master sheets in. Grab Slide list, then work from there, // master sheets in. Grab Slide list, then work from there,
// but ensure no duplicates // but ensure no duplicates
HashSet seenNotes = new HashSet(); HashSet seenNotes = new HashSet();
HeadersFooters hf = _show.getNotesHeadersFooters();
for(int i=0; i<_slides.length; i++) { for(int i=0; i<_slides.length; i++) {
Notes notes = _slides[i].getNotesSheet(); Notes notes = _slides[i].getNotesSheet();
if(notes == null) { continue; } if(notes == null) { continue; }
@ -226,6 +232,12 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
if(seenNotes.contains(id)) { continue; } if(seenNotes.contains(id)) { continue; }
seenNotes.add(id); seenNotes.add(id);
// Repeat the Notes header, if set
if(hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
ret.append(hf.getHeaderText() + "\n");
}
// Notes text
TextRun[] runs = notes.getTextRuns(); TextRun[] runs = notes.getTextRuns();
if(runs != null && runs.length > 0) { if(runs != null && runs.length > 0) {
for(int j=0; j<runs.length; j++) { for(int j=0; j<runs.length; j++) {
@ -237,6 +249,11 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
} }
} }
} }
// Repeat the notes footer, if set
if(hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
ret.append(hf.getFooterText() + "\n");
}
} }
} }

View File

@ -23,6 +23,8 @@ import org.apache.poi.hslf.usermodel.SlideShow;
/** /**
* Header / Footer settings. * Header / Footer settings.
* *
* You can get these on slides, or across all notes
*
* @author Yegor Kozlov * @author Yegor Kozlov
*/ */
public class HeadersFooters { public class HeadersFooters {

View File

@ -253,32 +253,42 @@ public class TextExtractor extends TestCase {
/** /**
* From bug #45537 * From bug #45537
*/ */
public void DISABLEDtestHeaderFooter() throws Exception { public void testHeaderFooter() throws Exception {
String filename, text; String filename, text;
// With a header // With a header on the notes
filename = dirname + "/45537_Header.ppt"; filename = dirname + "/45537_Header.ppt";
HSLFSlideShow hslf = new HSLFSlideShow(new FileInputStream(filename)); HSLFSlideShow hslf = new HSLFSlideShow(new FileInputStream(filename));
SlideShow ss = new SlideShow(hslf); SlideShow ss = new SlideShow(hslf);
assertNotNull(ss.getSlides()[0].getHeadersFooters()); assertNotNull(ss.getNotesHeadersFooters());
assertEquals("testdoc test phrase", ss.getSlides()[0].getHeadersFooters().getHeaderText()); assertEquals("testdoc test phrase", ss.getNotesHeadersFooters().getHeaderText());
ppe = new PowerPointExtractor(hslf); ppe = new PowerPointExtractor(hslf);
text = ppe.getText();
assertFalse("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertFalse("Unable to find expected word in text\n" + text, text.contains("test phrase"));
ppe.setNotesByDefault(true);
text = ppe.getText(); text = ppe.getText();
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
// And with a footer // And with a footer, also on notes
filename = dirname + "/45537_Footer.ppt"; filename = dirname + "/45537_Footer.ppt";
hslf = new HSLFSlideShow(new FileInputStream(filename)); hslf = new HSLFSlideShow(new FileInputStream(filename));
ss = new SlideShow(hslf); ss = new SlideShow(hslf);
assertNotNull(ss.getSlides()[0].getHeadersFooters()); assertNotNull(ss.getNotesHeadersFooters());
assertEquals("testdoc test phrase", ss.getSlides()[0].getHeadersFooters().getFooterText()); assertEquals("testdoc test phrase", ss.getNotesHeadersFooters().getFooterText());
ppe = new PowerPointExtractor(filename); ppe = new PowerPointExtractor(filename);
text = ppe.getText();
assertFalse("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertFalse("Unable to find expected word in text\n" + text, text.contains("test phrase"));
ppe.setNotesByDefault(true);
text = ppe.getText(); text = ppe.getText();
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));