WPFWordExtractor does not extract inserted/deleted text, see Bugzilla 45597
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@797740 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
47909f9f88
commit
307f443c33
@ -33,6 +33,7 @@
|
||||
|
||||
<changes>
|
||||
<release version="3.5-beta7" date="2009-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="fix">47571 - Fixed XWPFWordExtractor to extract inserted/deleted text</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47548 - Fixed RecordFactoryInputStream to properly read continued DrawingRecords</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">46419 - Fixed compatibility issue with OpenOffice 3.0</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47559 - Fixed compatibility issue with Excel 2008 Mac sp2</action>
|
||||
|
@ -18,6 +18,7 @@ package org.apache.poi.xwpf.usermodel;
|
||||
|
||||
import java.math.BigInteger;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.xmlbeans.XmlCursor;
|
||||
import org.apache.xmlbeans.XmlObject;
|
||||
@ -56,23 +57,18 @@ public class XWPFParagraph {
|
||||
// TODO - replace this with some sort of XPath expression
|
||||
// to directly find all the CTRs, in the right order
|
||||
ArrayList<CTR> rs = new ArrayList<CTR>();
|
||||
CTR[] tmp;
|
||||
rs.addAll(Arrays.asList(paragraph.getRArray()));
|
||||
|
||||
// Get the main text runs
|
||||
tmp = paragraph.getRArray();
|
||||
for (int i = 0; i < tmp.length; i++) {
|
||||
rs.add(tmp[i]);
|
||||
for (CTSdtRun sdt : paragraph.getSdtArray()) {
|
||||
CTSdtContentRun run = sdt.getSdtContent();
|
||||
rs.addAll(Arrays.asList(run.getRArray()));
|
||||
}
|
||||
for (CTRunTrackChange c : paragraph.getDelArray()) {
|
||||
rs.addAll(Arrays.asList(c.getRArray()));
|
||||
}
|
||||
|
||||
// Not sure quite what these are, but they hold
|
||||
// more text runs
|
||||
CTSdtRun[] sdts = paragraph.getSdtArray();
|
||||
for (int i = 0; i < sdts.length; i++) {
|
||||
CTSdtContentRun run = sdts[i].getSdtContent();
|
||||
tmp = run.getRArray();
|
||||
for (int j = 0; j < tmp.length; j++) {
|
||||
rs.add(tmp[j]);
|
||||
}
|
||||
for (CTRunTrackChange c : paragraph.getInsArray()) {
|
||||
rs.addAll(Arrays.asList(c.getRArray()));
|
||||
}
|
||||
|
||||
// Get text of the paragraph
|
||||
|
@ -192,6 +192,13 @@ public class TestXWPFWordExtractor extends TestCase {
|
||||
assertTrue(extractor.getText().contains("XXX"));
|
||||
}
|
||||
|
||||
public void testInsertedDeletedText() throws Exception {
|
||||
XWPFDocument doc = open("delins.docx");
|
||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||
|
||||
assertTrue(extractor.getText().contains("pendant worn"));
|
||||
assertTrue(extractor.getText().contains("extremely well"));
|
||||
}
|
||||
|
||||
//TODO use the same logic for opening test files as in HSSFTestDataSamples
|
||||
private XWPFDocument open(String sampleFileName) throws IOException {
|
||||
|
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/delins.docx
Executable file
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/delins.docx
Executable file
Binary file not shown.
Loading…
Reference in New Issue
Block a user