Fix bug #49189 - Detect w:tab and w:cr entries in XWPF paragraphs, even when the XSD is silly and maps them to CTEmpty
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@948199 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
24b73b2cdc
commit
f6c41635e5
@ -34,6 +34,7 @@
|
|||||||
|
|
||||||
<changes>
|
<changes>
|
||||||
<release version="3.7-SNAPSHOT" date="2010-??-??">
|
<release version="3.7-SNAPSHOT" date="2010-??-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="fix">49189 - Detect w:tab and w:cr entries in XWPF paragraphs, even when the XSD is silly and maps them to CTEmpty</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">49273 - Correct handling for Font Character Sets with indicies greater than 127</action>
|
<action dev="POI-DEVELOPERS" type="fix">49273 - Correct handling for Font Character Sets with indicies greater than 127</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">49334 - Track the ValueRangeRecords of charts in HSSFChart, to allow the basic axis operations</action>
|
<action dev="POI-DEVELOPERS" type="add">49334 - Track the ValueRangeRecords of charts in HSSFChart, to allow the basic axis operations</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">49242 - Track the LinkDataRecords of charts in HSSFChart</action>
|
<action dev="POI-DEVELOPERS" type="add">49242 - Track the LinkDataRecords of charts in HSSFChart</action>
|
||||||
|
@ -24,6 +24,7 @@ import org.apache.xmlbeans.XmlCursor;
|
|||||||
import org.apache.xmlbeans.XmlObject;
|
import org.apache.xmlbeans.XmlObject;
|
||||||
import org.apache.poi.util.Internal;
|
import org.apache.poi.util.Internal;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBorder;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBorder;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTEmpty;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFtnEdnRef;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFtnEdnRef;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTInd;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTInd;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTJc;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTJc;
|
||||||
@ -108,6 +109,18 @@ public class XWPFParagraph {
|
|||||||
if (o instanceof CTPTab) {
|
if (o instanceof CTPTab) {
|
||||||
text.append("\t");
|
text.append("\t");
|
||||||
}
|
}
|
||||||
|
if (o instanceof CTEmpty) {
|
||||||
|
// Some inline text elements get returned not as
|
||||||
|
// themselves, but as CTEmpty, owing to some odd
|
||||||
|
// definitions around line 5642 of the XSDs
|
||||||
|
String tagName = o.getDomNode().getNodeName();
|
||||||
|
if ("w:tab".equals(tagName)) {
|
||||||
|
text.append("\t");
|
||||||
|
}
|
||||||
|
if ("w:cr".equals(tagName)) {
|
||||||
|
text.append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
//got a reference to a footnote
|
//got a reference to a footnote
|
||||||
if (o instanceof CTFtnEdnRef) {
|
if (o instanceof CTFtnEdnRef) {
|
||||||
CTFtnEdnRef ftn = (CTFtnEdnRef) o;
|
CTFtnEdnRef ftn = (CTFtnEdnRef) o;
|
||||||
|
@ -219,4 +219,22 @@ public class TestXWPFWordExtractor extends TestCase {
|
|||||||
assertTrue(extractor.getText().contains("2008"));
|
assertTrue(extractor.getText().contains("2008"));
|
||||||
assertTrue(extractor.getText().contains("(120 "));
|
assertTrue(extractor.getText().contains("(120 "));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that we handle things like tabs and
|
||||||
|
* carriage returns properly in the text that
|
||||||
|
* we're extracting (bug #49189)
|
||||||
|
*/
|
||||||
|
public void testDocTabs() {
|
||||||
|
XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("WithTabs.docx");
|
||||||
|
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||||
|
|
||||||
|
// Check bits
|
||||||
|
assertTrue(extractor.getText().contains("a"));
|
||||||
|
assertTrue(extractor.getText().contains("\t"));
|
||||||
|
assertTrue(extractor.getText().contains("b"));
|
||||||
|
|
||||||
|
// Now check the first paragraph in total
|
||||||
|
assertTrue(extractor.getText().contains("a\tb\n"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
BIN
test-data/document/WithTabs.docx
Normal file
BIN
test-data/document/WithTabs.docx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user