diff --git a/.gitignore b/.gitignore index 2ebc54416..103731746 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,4 @@ sonar/*/target .ant-targets-build.xml build dist +lib/ diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java index 081b751cd..01463c64a 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java @@ -156,26 +156,34 @@ public class XWPFDocument extends POIXMLDocument implements Document, IBody { // parse the document with cursor and add // the XmlObject to its lists - XmlCursor cursor = ctDocument.getBody().newCursor(); - cursor.selectPath("./*"); - while (cursor.toNextSelection()) { - XmlObject o = cursor.getObject(); - if (o instanceof CTP) { - XWPFParagraph p = new XWPFParagraph((CTP) o, this); - bodyElements.add(p); - paragraphs.add(p); - } else if (o instanceof CTTbl) { - XWPFTable t = new XWPFTable((CTTbl) o, this); - bodyElements.add(t); - tables.add(t); - } else if (o instanceof CTSdtBlock) { - XWPFSDT c = new XWPFSDT((CTSdtBlock) o, this); - bodyElements.add(c); - contentControls.add(c); + XmlCursor docCursor = ctDocument.newCursor(); + docCursor.selectPath("./*"); + while (docCursor.toNextSelection()) { + XmlObject o = docCursor.getObject(); + if (o instanceof CTBody) { + XmlCursor bodyCursor = o.newCursor(); + bodyCursor.selectPath("./*"); + while (bodyCursor.toNextSelection()) { + XmlObject bodyObj = bodyCursor.getObject(); + if (bodyObj instanceof CTP) { + XWPFParagraph p = new XWPFParagraph((CTP) bodyObj, + this); + bodyElements.add(p); + paragraphs.add(p); + } else if (bodyObj instanceof CTTbl) { + XWPFTable t = new XWPFTable((CTTbl) bodyObj, this); + bodyElements.add(t); + tables.add(t); + } else if (bodyObj instanceof CTSdtBlock) { + XWPFSDT c = new XWPFSDT((CTSdtBlock) bodyObj, this); + bodyElements.add(c); + contentControls.add(c); + } + } + bodyCursor.dispose(); } } - cursor.dispose(); - + docCursor.dispose(); // Sort out headers and footers if (doc.getDocument().getBody().getSectPr() != null) headerFooterPolicy = new XWPFHeaderFooterPolicy(this); diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java index f8e4e4941..b83b27d73 100644 --- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java @@ -411,4 +411,14 @@ public class TestXWPFWordExtractor extends TestCase { "In Sequence:\n|X||_||X|\n", extractor.getText()); extractor.close(); } + + public void testMultipleBodyBug() throws IOException { + XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("MultipleBodyBug.docx"); + XWPFWordExtractor extractor = new XWPFWordExtractor(doc); + assertEquals("START BODY 1 The quick, brown fox jumps over a lazy dog. END BODY 1.\n" + + "START BODY 2 The quick, brown fox jumps over a lazy dog. END BODY 2.\n" + + "START BODY 3 The quick, brown fox jumps over a lazy dog. END BODY 3.\n", + extractor.getText()); + extractor.close(); + } } diff --git a/test-data/document/MultipleBodyBug.docx b/test-data/document/MultipleBodyBug.docx new file mode 100644 index 000000000..84f795b26 Binary files /dev/null and b/test-data/document/MultipleBodyBug.docx differ