With help from Yegor, fix bug #45537 - Include headers and footers (of slides and notes) in the extracted text from HSLF
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@683020 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1d478f3af7
commit
17f779f950
@ -37,6 +37,7 @@
|
|||||||
|
|
||||||
<!-- Don't forget to update status.xml too! -->
|
<!-- Don't forget to update status.xml too! -->
|
||||||
<release version="3.1.1-alpha1" date="2008-??-??">
|
<release version="3.1.1-alpha1" date="2008-??-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="add">45537 - Include headers and footers (of slides and notes) in the extracted text from HSLF</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">45472 - Fixed incorrect default row height in OpenOffice 2.3</action>
|
<action dev="POI-DEVELOPERS" type="fix">45472 - Fixed incorrect default row height in OpenOffice 2.3</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">44692 - HSSFPicture.resize() stretched image when there was a text next to it</action>
|
<action dev="POI-DEVELOPERS" type="fix">44692 - HSSFPicture.resize() stretched image when there was a text next to it</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>
|
<action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
<!-- Don't forget to update changes.xml too! -->
|
<!-- Don't forget to update changes.xml too! -->
|
||||||
<changes>
|
<changes>
|
||||||
<release version="3.1.1-alpha1" date="2008-??-??">
|
<release version="3.1.1-alpha1" date="2008-??-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="add">45537 - Include headers and footers (of slides and notes) in the extracted text from HSLF</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">45472 - Fixed incorrect default row height in OpenOffice 2.3</action>
|
<action dev="POI-DEVELOPERS" type="fix">45472 - Fixed incorrect default row height in OpenOffice 2.3</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">44692 - HSSFPicture.resize() stretched image when there was a text next to it</action>
|
<action dev="POI-DEVELOPERS" type="fix">44692 - HSSFPicture.resize() stretched image when there was a text next to it</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>
|
<action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>
|
||||||
|
@ -176,11 +176,13 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
|
|||||||
for(int i=0; i<_slides.length; i++) {
|
for(int i=0; i<_slides.length; i++) {
|
||||||
Slide slide = _slides[i];
|
Slide slide = _slides[i];
|
||||||
|
|
||||||
|
// Slide header, if set
|
||||||
HeadersFooters hf = slide.getHeadersFooters();
|
HeadersFooters hf = slide.getHeadersFooters();
|
||||||
if(hf != null && hf.getHeaderText() != null) {
|
if(hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
|
||||||
ret.append(hf.getHeaderText() + "\n");
|
ret.append(hf.getHeaderText() + "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Slide text
|
||||||
TextRun[] runs = slide.getTextRuns();
|
TextRun[] runs = slide.getTextRuns();
|
||||||
for(int j=0; j<runs.length; j++) {
|
for(int j=0; j<runs.length; j++) {
|
||||||
TextRun run = runs[j];
|
TextRun run = runs[j];
|
||||||
@ -193,10 +195,12 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(hf != null && hf.getFooterText() != null) {
|
// Slide footer, if set
|
||||||
|
if(hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
|
||||||
ret.append(hf.getFooterText() + "\n");
|
ret.append(hf.getFooterText() + "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Comments, if requested and present
|
||||||
if(getCommentText) {
|
if(getCommentText) {
|
||||||
Comment[] comments = slide.getComments();
|
Comment[] comments = slide.getComments();
|
||||||
for(int j=0; j<comments.length; j++) {
|
for(int j=0; j<comments.length; j++) {
|
||||||
@ -219,6 +223,8 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
|
|||||||
// master sheets in. Grab Slide list, then work from there,
|
// master sheets in. Grab Slide list, then work from there,
|
||||||
// but ensure no duplicates
|
// but ensure no duplicates
|
||||||
HashSet seenNotes = new HashSet();
|
HashSet seenNotes = new HashSet();
|
||||||
|
HeadersFooters hf = _show.getNotesHeadersFooters();
|
||||||
|
|
||||||
for(int i=0; i<_slides.length; i++) {
|
for(int i=0; i<_slides.length; i++) {
|
||||||
Notes notes = _slides[i].getNotesSheet();
|
Notes notes = _slides[i].getNotesSheet();
|
||||||
if(notes == null) { continue; }
|
if(notes == null) { continue; }
|
||||||
@ -226,6 +232,12 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
|
|||||||
if(seenNotes.contains(id)) { continue; }
|
if(seenNotes.contains(id)) { continue; }
|
||||||
seenNotes.add(id);
|
seenNotes.add(id);
|
||||||
|
|
||||||
|
// Repeat the Notes header, if set
|
||||||
|
if(hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
|
||||||
|
ret.append(hf.getHeaderText() + "\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Notes text
|
||||||
TextRun[] runs = notes.getTextRuns();
|
TextRun[] runs = notes.getTextRuns();
|
||||||
if(runs != null && runs.length > 0) {
|
if(runs != null && runs.length > 0) {
|
||||||
for(int j=0; j<runs.length; j++) {
|
for(int j=0; j<runs.length; j++) {
|
||||||
@ -237,6 +249,11 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Repeat the notes footer, if set
|
||||||
|
if(hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
|
||||||
|
ret.append(hf.getFooterText() + "\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,6 +23,8 @@ import org.apache.poi.hslf.usermodel.SlideShow;
|
|||||||
/**
|
/**
|
||||||
* Header / Footer settings.
|
* Header / Footer settings.
|
||||||
*
|
*
|
||||||
|
* You can get these on slides, or across all notes
|
||||||
|
*
|
||||||
* @author Yegor Kozlov
|
* @author Yegor Kozlov
|
||||||
*/
|
*/
|
||||||
public class HeadersFooters {
|
public class HeadersFooters {
|
||||||
|
@ -253,32 +253,42 @@ public class TextExtractor extends TestCase {
|
|||||||
/**
|
/**
|
||||||
* From bug #45537
|
* From bug #45537
|
||||||
*/
|
*/
|
||||||
public void DISABLEDtestHeaderFooter() throws Exception {
|
public void testHeaderFooter() throws Exception {
|
||||||
String filename, text;
|
String filename, text;
|
||||||
|
|
||||||
// With a header
|
// With a header on the notes
|
||||||
filename = dirname + "/45537_Header.ppt";
|
filename = dirname + "/45537_Header.ppt";
|
||||||
HSLFSlideShow hslf = new HSLFSlideShow(new FileInputStream(filename));
|
HSLFSlideShow hslf = new HSLFSlideShow(new FileInputStream(filename));
|
||||||
SlideShow ss = new SlideShow(hslf);
|
SlideShow ss = new SlideShow(hslf);
|
||||||
assertNotNull(ss.getSlides()[0].getHeadersFooters());
|
assertNotNull(ss.getNotesHeadersFooters());
|
||||||
assertEquals("testdoc test phrase", ss.getSlides()[0].getHeadersFooters().getHeaderText());
|
assertEquals("testdoc test phrase", ss.getNotesHeadersFooters().getHeaderText());
|
||||||
|
|
||||||
ppe = new PowerPointExtractor(hslf);
|
ppe = new PowerPointExtractor(hslf);
|
||||||
|
|
||||||
|
text = ppe.getText();
|
||||||
|
assertFalse("Unable to find expected word in text\n" + text, text.contains("testdoc"));
|
||||||
|
assertFalse("Unable to find expected word in text\n" + text, text.contains("test phrase"));
|
||||||
|
|
||||||
|
ppe.setNotesByDefault(true);
|
||||||
text = ppe.getText();
|
text = ppe.getText();
|
||||||
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
|
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
|
||||||
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
|
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
|
||||||
|
|
||||||
|
|
||||||
// And with a footer
|
// And with a footer, also on notes
|
||||||
filename = dirname + "/45537_Footer.ppt";
|
filename = dirname + "/45537_Footer.ppt";
|
||||||
hslf = new HSLFSlideShow(new FileInputStream(filename));
|
hslf = new HSLFSlideShow(new FileInputStream(filename));
|
||||||
ss = new SlideShow(hslf);
|
ss = new SlideShow(hslf);
|
||||||
assertNotNull(ss.getSlides()[0].getHeadersFooters());
|
assertNotNull(ss.getNotesHeadersFooters());
|
||||||
assertEquals("testdoc test phrase", ss.getSlides()[0].getHeadersFooters().getFooterText());
|
assertEquals("testdoc test phrase", ss.getNotesHeadersFooters().getFooterText());
|
||||||
|
|
||||||
ppe = new PowerPointExtractor(filename);
|
ppe = new PowerPointExtractor(filename);
|
||||||
|
|
||||||
|
text = ppe.getText();
|
||||||
|
assertFalse("Unable to find expected word in text\n" + text, text.contains("testdoc"));
|
||||||
|
assertFalse("Unable to find expected word in text\n" + text, text.contains("test phrase"));
|
||||||
|
|
||||||
|
ppe.setNotesByDefault(true);
|
||||||
text = ppe.getText();
|
text = ppe.getText();
|
||||||
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
|
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
|
||||||
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
|
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
|
||||||
|
Loading…
Reference in New Issue
Block a user