Fix bug #49189 - Detect w:tab and w:cr entries in XWPF paragraphs, even when the XSD is silly and maps them to CTEmpty

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@948199 13f79535-47bb-0310-9956-ffa450edef68
2010-05-25 20:31:42 +00:00 · 2010-05-25 20:31:42 +00:00 · f6c41635e5
commit f6c41635e5
parent 24b73b2cdc
4 changed files with 32 additions and 0 deletions
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@ -34,6 +34,7 @@
    <changes>
        <release version="3.7-SNAPSHOT" date="2010-??-??">
           <action dev="POI-DEVELOPERS" type="fix">49189 - Detect w:tab and w:cr entries in XWPF paragraphs, even when the XSD is silly and maps them to CTEmpty</action>
           <action dev="POI-DEVELOPERS" type="fix">49273 - Correct handling for Font Character Sets with indicies greater than 127</action>
           <action dev="POI-DEVELOPERS" type="add">49334 - Track the ValueRangeRecords of charts in HSSFChart, to allow the basic axis operations</action>
           <action dev="POI-DEVELOPERS" type="add">49242 - Track the LinkDataRecords of charts in HSSFChart</action>
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
@ -24,6 +24,7 @@ import org.apache.xmlbeans.XmlCursor;
 import org.apache.xmlbeans.XmlObject;
 import org.apache.poi.util.Internal;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBorder;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTEmpty;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFtnEdnRef;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTInd;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTJc;
@ -108,6 +109,18 @@ public class XWPFParagraph {
                    if (o instanceof CTPTab) {
                        text.append("\t");
                    }
                    if (o instanceof CTEmpty) {
                       // Some inline text elements get returned not as
                       //  themselves, but as CTEmpty, owing to some odd
                       //  definitions around line 5642 of the XSDs
                       String tagName = o.getDomNode().getNodeName();
                       if ("w:tab".equals(tagName)) {
                          text.append("\t");
                       }
                       if ("w:cr".equals(tagName)) {
                          text.append("\n");
                       }
                    }
                    //got a reference to a footnote
                    if (o instanceof CTFtnEdnRef) {
                        CTFtnEdnRef ftn = (CTFtnEdnRef) o;
--- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
@ -219,4 +219,22 @@ public class TestXWPFWordExtractor extends TestCase {
        assertTrue(extractor.getText().contains("2008"));
        assertTrue(extractor.getText().contains("(120 "));
    }
    /**
     * Test that we handle things like tabs and
     *  carriage returns properly in the text that
     *  we're extracting (bug #49189)
     */
    public void testDocTabs() {
       XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("WithTabs.docx");
       XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
       // Check bits
       assertTrue(extractor.getText().contains("a"));
       assertTrue(extractor.getText().contains("\t"));
       assertTrue(extractor.getText().contains("b"));
       // Now check the first paragraph in total
       assertTrue(extractor.getText().contains("a\tb\n"));
    }
 }
--- a/test-data/document/WithTabs.docx
+++ b/test-data/document/WithTabs.docx