Fix bug #49189 - Detect w:tab and w:cr entries in XWPF paragraphs, even when the XSD is silly and maps them to CTEmpty
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@948199 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
24b73b2cdc
commit
f6c41635e5
@ -34,6 +34,7 @@
|
||||
|
||||
<changes>
|
||||
<release version="3.7-SNAPSHOT" date="2010-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="fix">49189 - Detect w:tab and w:cr entries in XWPF paragraphs, even when the XSD is silly and maps them to CTEmpty</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">49273 - Correct handling for Font Character Sets with indicies greater than 127</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">49334 - Track the ValueRangeRecords of charts in HSSFChart, to allow the basic axis operations</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">49242 - Track the LinkDataRecords of charts in HSSFChart</action>
|
||||
|
@ -24,6 +24,7 @@ import org.apache.xmlbeans.XmlCursor;
|
||||
import org.apache.xmlbeans.XmlObject;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBorder;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTEmpty;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFtnEdnRef;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTInd;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTJc;
|
||||
@ -108,6 +109,18 @@ public class XWPFParagraph {
|
||||
if (o instanceof CTPTab) {
|
||||
text.append("\t");
|
||||
}
|
||||
if (o instanceof CTEmpty) {
|
||||
// Some inline text elements get returned not as
|
||||
// themselves, but as CTEmpty, owing to some odd
|
||||
// definitions around line 5642 of the XSDs
|
||||
String tagName = o.getDomNode().getNodeName();
|
||||
if ("w:tab".equals(tagName)) {
|
||||
text.append("\t");
|
||||
}
|
||||
if ("w:cr".equals(tagName)) {
|
||||
text.append("\n");
|
||||
}
|
||||
}
|
||||
//got a reference to a footnote
|
||||
if (o instanceof CTFtnEdnRef) {
|
||||
CTFtnEdnRef ftn = (CTFtnEdnRef) o;
|
||||
|
@ -219,4 +219,22 @@ public class TestXWPFWordExtractor extends TestCase {
|
||||
assertTrue(extractor.getText().contains("2008"));
|
||||
assertTrue(extractor.getText().contains("(120 "));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that we handle things like tabs and
|
||||
* carriage returns properly in the text that
|
||||
* we're extracting (bug #49189)
|
||||
*/
|
||||
public void testDocTabs() {
|
||||
XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("WithTabs.docx");
|
||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||
|
||||
// Check bits
|
||||
assertTrue(extractor.getText().contains("a"));
|
||||
assertTrue(extractor.getText().contains("\t"));
|
||||
assertTrue(extractor.getText().contains("b"));
|
||||
|
||||
// Now check the first paragraph in total
|
||||
assertTrue(extractor.getText().contains("a\tb\n"));
|
||||
}
|
||||
}
|
||||
|
BIN
test-data/document/WithTabs.docx
Normal file
BIN
test-data/document/WithTabs.docx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user