Fix bug #55733 - XWPFWordExtractor need

s to handle .docx files with neither headers nor footers

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1538044 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2013-11-01 19:43:46 +00:00
parent 01b276b2f3
commit a6b301e485
4 changed files with 32 additions and 20 deletions

View File

@ -17,7 +17,6 @@
package org.apache.poi.xwpf.extractor; package org.apache.poi.xwpf.extractor;
import java.io.IOException; import java.io.IOException;
import java.util.Iterator;
import java.util.List; import java.util.List;
import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLDocument;
@ -34,7 +33,6 @@ import org.apache.poi.xwpf.usermodel.XWPFHyperlink;
import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun; import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun;
import org.apache.poi.xwpf.usermodel.XWPFParagraph; import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRelation; import org.apache.poi.xwpf.usermodel.XWPFRelation;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFSDT; import org.apache.poi.xwpf.usermodel.XWPFSDT;
import org.apache.poi.xwpf.usermodel.XWPFTable; import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell; import org.apache.poi.xwpf.usermodel.XWPFTableCell;
@ -85,24 +83,24 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
System.out.println(extractor.getText()); System.out.println(extractor.getText());
} }
public String getText() { public String getText() {
StringBuffer text = new StringBuffer(); StringBuffer text = new StringBuffer();
XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy(); XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
// Start out with all headers // Start out with all headers
extractHeaders(text, hfPolicy); extractHeaders(text, hfPolicy);
// body elements // Process all body elements
for (IBodyElement e : document.getBodyElements()){ for (IBodyElement e : document.getBodyElements()){
appendBodyElementText(text, e); appendBodyElementText(text, e);
text.append('\n'); text.append('\n');
} }
// Finish up with all the footers // Finish up with all the footers
extractFooters(text, hfPolicy); extractFooters(text, hfPolicy);
return text.toString(); return text.toString();
} }
public void appendBodyElementText(StringBuffer text, IBodyElement e){ public void appendBodyElementText(StringBuffer text, IBodyElement e){
if (e instanceof XWPFParagraph){ if (e instanceof XWPFParagraph){
@ -178,6 +176,8 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
} }
private void extractFooters(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) { private void extractFooters(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
if (hfPolicy == null) return;
if(hfPolicy.getFirstPageFooter() != null) { if(hfPolicy.getFirstPageFooter() != null) {
text.append( hfPolicy.getFirstPageFooter().getText() ); text.append( hfPolicy.getFirstPageFooter().getText() );
} }
@ -190,6 +190,8 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
} }
private void extractHeaders(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) { private void extractHeaders(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
if (hfPolicy == null) return;
if(hfPolicy.getFirstPageHeader() != null) { if(hfPolicy.getFirstPageHeader() != null) {
text.append( hfPolicy.getFirstPageHeader().getText() ); text.append( hfPolicy.getFirstPageHeader().getText() );
} }

View File

@ -351,5 +351,16 @@ public class TestXWPFWordExtractor extends TestCase {
assertEquals("controlled content loading-"+targ, true, hit); assertEquals("controlled content loading-"+targ, true, hit);
} }
assertEquals("controlled content loading hit count", targs.length, hits); assertEquals("controlled content loading hit count", targs.length, hits);
ex.close();
}
/** No Header or Footer in document */
public void testBug55733() throws Exception {
XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("55733.docx");
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
// Check it gives text without error
extractor.getText();
extractor.close();
} }
} }

View File

@ -45,5 +45,4 @@ public final class TestWordExtractorBugs extends TestCase {
// Check it gives text without error // Check it gives text without error
extractor.getText(); extractor.getText();
} }
} }

Binary file not shown.