Fix bug #55733 - XWPFWordExtractor need
s to handle .docx files with neither headers nor footers git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1538044 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
01b276b2f3
commit
a6b301e485
@ -17,7 +17,6 @@
|
|||||||
package org.apache.poi.xwpf.extractor;
|
package org.apache.poi.xwpf.extractor;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.poi.POIXMLDocument;
|
import org.apache.poi.POIXMLDocument;
|
||||||
@ -34,7 +33,6 @@ import org.apache.poi.xwpf.usermodel.XWPFHyperlink;
|
|||||||
import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun;
|
import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun;
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFRelation;
|
import org.apache.poi.xwpf.usermodel.XWPFRelation;
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFRun;
|
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFSDT;
|
import org.apache.poi.xwpf.usermodel.XWPFSDT;
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFTable;
|
import org.apache.poi.xwpf.usermodel.XWPFTable;
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
|
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
|
||||||
@ -85,24 +83,24 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
|||||||
System.out.println(extractor.getText());
|
System.out.println(extractor.getText());
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getText() {
|
public String getText() {
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuffer text = new StringBuffer();
|
||||||
XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
|
XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
|
||||||
|
|
||||||
// Start out with all headers
|
// Start out with all headers
|
||||||
extractHeaders(text, hfPolicy);
|
extractHeaders(text, hfPolicy);
|
||||||
|
|
||||||
// body elements
|
// Process all body elements
|
||||||
for (IBodyElement e : document.getBodyElements()){
|
for (IBodyElement e : document.getBodyElements()){
|
||||||
appendBodyElementText(text, e);
|
appendBodyElementText(text, e);
|
||||||
text.append('\n');
|
text.append('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finish up with all the footers
|
// Finish up with all the footers
|
||||||
extractFooters(text, hfPolicy);
|
extractFooters(text, hfPolicy);
|
||||||
|
|
||||||
return text.toString();
|
return text.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void appendBodyElementText(StringBuffer text, IBodyElement e){
|
public void appendBodyElementText(StringBuffer text, IBodyElement e){
|
||||||
if (e instanceof XWPFParagraph){
|
if (e instanceof XWPFParagraph){
|
||||||
@ -178,6 +176,8 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void extractFooters(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
|
private void extractFooters(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
|
||||||
|
if (hfPolicy == null) return;
|
||||||
|
|
||||||
if(hfPolicy.getFirstPageFooter() != null) {
|
if(hfPolicy.getFirstPageFooter() != null) {
|
||||||
text.append( hfPolicy.getFirstPageFooter().getText() );
|
text.append( hfPolicy.getFirstPageFooter().getText() );
|
||||||
}
|
}
|
||||||
@ -190,6 +190,8 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void extractHeaders(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
|
private void extractHeaders(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
|
||||||
|
if (hfPolicy == null) return;
|
||||||
|
|
||||||
if(hfPolicy.getFirstPageHeader() != null) {
|
if(hfPolicy.getFirstPageHeader() != null) {
|
||||||
text.append( hfPolicy.getFirstPageHeader().getText() );
|
text.append( hfPolicy.getFirstPageHeader().getText() );
|
||||||
}
|
}
|
||||||
|
@ -351,5 +351,16 @@ public class TestXWPFWordExtractor extends TestCase {
|
|||||||
assertEquals("controlled content loading-"+targ, true, hit);
|
assertEquals("controlled content loading-"+targ, true, hit);
|
||||||
}
|
}
|
||||||
assertEquals("controlled content loading hit count", targs.length, hits);
|
assertEquals("controlled content loading hit count", targs.length, hits);
|
||||||
|
ex.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** No Header or Footer in document */
|
||||||
|
public void testBug55733() throws Exception {
|
||||||
|
XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("55733.docx");
|
||||||
|
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||||
|
|
||||||
|
// Check it gives text without error
|
||||||
|
extractor.getText();
|
||||||
|
extractor.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -45,5 +45,4 @@ public final class TestWordExtractorBugs extends TestCase {
|
|||||||
// Check it gives text without error
|
// Check it gives text without error
|
||||||
extractor.getText();
|
extractor.getText();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
BIN
test-data/document/55733.docx
Normal file
BIN
test-data/document/55733.docx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user