With help from Yegor, fix bug #45537 - Include headers and footers (of slides and notes) in the extracted text from HSLF

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@683020 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-08-05 22:49:24 +00:00
parent 1d478f3af7
commit 17f779f950
5 changed files with 40 additions and 9 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! -->
<release version="3.1.1-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">45537 - Include headers and footers (of slides and notes) in the extracted text from HSLF</action>
<action dev="POI-DEVELOPERS" type="fix">45472 - Fixed incorrect default row height in OpenOffice 2.3</action>
<action dev="POI-DEVELOPERS" type="fix">44692 - HSSFPicture.resize() stretched image when there was a text next to it</action>
<action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.1.1-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">45537 - Include headers and footers (of slides and notes) in the extracted text from HSLF</action>
<action dev="POI-DEVELOPERS" type="fix">45472 - Fixed incorrect default row height in OpenOffice 2.3</action>
<action dev="POI-DEVELOPERS" type="fix">44692 - HSSFPicture.resize() stretched image when there was a text next to it</action>
<action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>

View File

@ -176,11 +176,13 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
for(int i=0; i<_slides.length; i++) {
Slide slide = _slides[i];
// Slide header, if set
HeadersFooters hf = slide.getHeadersFooters();
if(hf != null && hf.getHeaderText() != null) {
if(hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
ret.append(hf.getHeaderText() + "\n");
}
// Slide text
TextRun[] runs = slide.getTextRuns();
for(int j=0; j<runs.length; j++) {
TextRun run = runs[j];
@ -193,10 +195,12 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
}
}
if(hf != null && hf.getFooterText() != null) {
// Slide footer, if set
if(hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
ret.append(hf.getFooterText() + "\n");
}
// Comments, if requested and present
if(getCommentText) {
Comment[] comments = slide.getComments();
for(int j=0; j<comments.length; j++) {
@ -219,6 +223,8 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
// master sheets in. Grab Slide list, then work from there,
// but ensure no duplicates
HashSet seenNotes = new HashSet();
HeadersFooters hf = _show.getNotesHeadersFooters();
for(int i=0; i<_slides.length; i++) {
Notes notes = _slides[i].getNotesSheet();
if(notes == null) { continue; }
@ -226,6 +232,12 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
if(seenNotes.contains(id)) { continue; }
seenNotes.add(id);
// Repeat the Notes header, if set
if(hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
ret.append(hf.getHeaderText() + "\n");
}
// Notes text
TextRun[] runs = notes.getTextRuns();
if(runs != null && runs.length > 0) {
for(int j=0; j<runs.length; j++) {
@ -237,6 +249,11 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
}
}
}
// Repeat the notes footer, if set
if(hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
ret.append(hf.getFooterText() + "\n");
}
}
}

View File

@ -23,6 +23,8 @@ import org.apache.poi.hslf.usermodel.SlideShow;
/**
* Header / Footer settings.
*
* You can get these on slides, or across all notes
*
* @author Yegor Kozlov
*/
public class HeadersFooters {

View File

@ -253,32 +253,42 @@ public class TextExtractor extends TestCase {
/**
* From bug #45537
*/
public void DISABLEDtestHeaderFooter() throws Exception {
public void testHeaderFooter() throws Exception {
String filename, text;
// With a header
// With a header on the notes
filename = dirname + "/45537_Header.ppt";
HSLFSlideShow hslf = new HSLFSlideShow(new FileInputStream(filename));
SlideShow ss = new SlideShow(hslf);
assertNotNull(ss.getSlides()[0].getHeadersFooters());
assertEquals("testdoc test phrase", ss.getSlides()[0].getHeadersFooters().getHeaderText());
assertNotNull(ss.getNotesHeadersFooters());
assertEquals("testdoc test phrase", ss.getNotesHeadersFooters().getHeaderText());
ppe = new PowerPointExtractor(hslf);
text = ppe.getText();
assertFalse("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertFalse("Unable to find expected word in text\n" + text, text.contains("test phrase"));
ppe.setNotesByDefault(true);
text = ppe.getText();
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
// And with a footer
// And with a footer, also on notes
filename = dirname + "/45537_Footer.ppt";
hslf = new HSLFSlideShow(new FileInputStream(filename));
ss = new SlideShow(hslf);
assertNotNull(ss.getSlides()[0].getHeadersFooters());
assertEquals("testdoc test phrase", ss.getSlides()[0].getHeadersFooters().getFooterText());
assertNotNull(ss.getNotesHeadersFooters());
assertEquals("testdoc test phrase", ss.getNotesHeadersFooters().getFooterText());
ppe = new PowerPointExtractor(filename);
text = ppe.getText();
assertFalse("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertFalse("Unable to find expected word in text\n" + text, text.contains("test phrase"));
ppe.setNotesByDefault(true);
text = ppe.getText();
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));