[Bug-61354] fix issue with extracting text from Word docs. This closes #66
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1803250 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e1f37388fc
commit
9f28e36438
1
.gitignore
vendored
1
.gitignore
vendored
@ -45,3 +45,4 @@ sonar/*/target
|
||||
.ant-targets-build.xml
|
||||
build
|
||||
dist
|
||||
lib/
|
||||
|
@ -156,26 +156,34 @@ public class XWPFDocument extends POIXMLDocument implements Document, IBody {
|
||||
|
||||
// parse the document with cursor and add
|
||||
// the XmlObject to its lists
|
||||
XmlCursor cursor = ctDocument.getBody().newCursor();
|
||||
cursor.selectPath("./*");
|
||||
while (cursor.toNextSelection()) {
|
||||
XmlObject o = cursor.getObject();
|
||||
if (o instanceof CTP) {
|
||||
XWPFParagraph p = new XWPFParagraph((CTP) o, this);
|
||||
bodyElements.add(p);
|
||||
paragraphs.add(p);
|
||||
} else if (o instanceof CTTbl) {
|
||||
XWPFTable t = new XWPFTable((CTTbl) o, this);
|
||||
bodyElements.add(t);
|
||||
tables.add(t);
|
||||
} else if (o instanceof CTSdtBlock) {
|
||||
XWPFSDT c = new XWPFSDT((CTSdtBlock) o, this);
|
||||
bodyElements.add(c);
|
||||
contentControls.add(c);
|
||||
XmlCursor docCursor = ctDocument.newCursor();
|
||||
docCursor.selectPath("./*");
|
||||
while (docCursor.toNextSelection()) {
|
||||
XmlObject o = docCursor.getObject();
|
||||
if (o instanceof CTBody) {
|
||||
XmlCursor bodyCursor = o.newCursor();
|
||||
bodyCursor.selectPath("./*");
|
||||
while (bodyCursor.toNextSelection()) {
|
||||
XmlObject bodyObj = bodyCursor.getObject();
|
||||
if (bodyObj instanceof CTP) {
|
||||
XWPFParagraph p = new XWPFParagraph((CTP) bodyObj,
|
||||
this);
|
||||
bodyElements.add(p);
|
||||
paragraphs.add(p);
|
||||
} else if (bodyObj instanceof CTTbl) {
|
||||
XWPFTable t = new XWPFTable((CTTbl) bodyObj, this);
|
||||
bodyElements.add(t);
|
||||
tables.add(t);
|
||||
} else if (bodyObj instanceof CTSdtBlock) {
|
||||
XWPFSDT c = new XWPFSDT((CTSdtBlock) bodyObj, this);
|
||||
bodyElements.add(c);
|
||||
contentControls.add(c);
|
||||
}
|
||||
}
|
||||
bodyCursor.dispose();
|
||||
}
|
||||
}
|
||||
cursor.dispose();
|
||||
|
||||
docCursor.dispose();
|
||||
// Sort out headers and footers
|
||||
if (doc.getDocument().getBody().getSectPr() != null)
|
||||
headerFooterPolicy = new XWPFHeaderFooterPolicy(this);
|
||||
|
@ -411,4 +411,14 @@ public class TestXWPFWordExtractor extends TestCase {
|
||||
"In Sequence:\n|X||_||X|\n", extractor.getText());
|
||||
extractor.close();
|
||||
}
|
||||
|
||||
public void testMultipleBodyBug() throws IOException {
|
||||
XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("MultipleBodyBug.docx");
|
||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||
assertEquals("START BODY 1 The quick, brown fox jumps over a lazy dog. END BODY 1.\n"
|
||||
+ "START BODY 2 The quick, brown fox jumps over a lazy dog. END BODY 2.\n"
|
||||
+ "START BODY 3 The quick, brown fox jumps over a lazy dog. END BODY 3.\n",
|
||||
extractor.getText());
|
||||
extractor.close();
|
||||
}
|
||||
}
|
||||
|
BIN
test-data/document/MultipleBodyBug.docx
Normal file
BIN
test-data/document/MultipleBodyBug.docx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user