WPFWordExtractor does not extract inserted/deleted text, see Bugzilla 45597

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@797740 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2009-07-25 10:41:53 +00:00
parent 47909f9f88
commit 307f443c33
4 changed files with 18 additions and 14 deletions

View File

@ -33,6 +33,7 @@
<changes> <changes>
<release version="3.5-beta7" date="2009-??-??"> <release version="3.5-beta7" date="2009-??-??">
<action dev="POI-DEVELOPERS" type="fix">47571 - Fixed XWPFWordExtractor to extract inserted/deleted text</action>
<action dev="POI-DEVELOPERS" type="fix">47548 - Fixed RecordFactoryInputStream to properly read continued DrawingRecords</action> <action dev="POI-DEVELOPERS" type="fix">47548 - Fixed RecordFactoryInputStream to properly read continued DrawingRecords</action>
<action dev="POI-DEVELOPERS" type="fix">46419 - Fixed compatibility issue with OpenOffice 3.0</action> <action dev="POI-DEVELOPERS" type="fix">46419 - Fixed compatibility issue with OpenOffice 3.0</action>
<action dev="POI-DEVELOPERS" type="fix">47559 - Fixed compatibility issue with Excel 2008 Mac sp2</action> <action dev="POI-DEVELOPERS" type="fix">47559 - Fixed compatibility issue with Excel 2008 Mac sp2</action>

View File

@ -18,6 +18,7 @@ package org.apache.poi.xwpf.usermodel;
import java.math.BigInteger; import java.math.BigInteger;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import org.apache.xmlbeans.XmlCursor; import org.apache.xmlbeans.XmlCursor;
import org.apache.xmlbeans.XmlObject; import org.apache.xmlbeans.XmlObject;
@ -56,23 +57,18 @@ public class XWPFParagraph {
// TODO - replace this with some sort of XPath expression // TODO - replace this with some sort of XPath expression
// to directly find all the CTRs, in the right order // to directly find all the CTRs, in the right order
ArrayList<CTR> rs = new ArrayList<CTR>(); ArrayList<CTR> rs = new ArrayList<CTR>();
CTR[] tmp; rs.addAll(Arrays.asList(paragraph.getRArray()));
// Get the main text runs for (CTSdtRun sdt : paragraph.getSdtArray()) {
tmp = paragraph.getRArray(); CTSdtContentRun run = sdt.getSdtContent();
for (int i = 0; i < tmp.length; i++) { rs.addAll(Arrays.asList(run.getRArray()));
rs.add(tmp[i]); }
for (CTRunTrackChange c : paragraph.getDelArray()) {
rs.addAll(Arrays.asList(c.getRArray()));
} }
// Not sure quite what these are, but they hold for (CTRunTrackChange c : paragraph.getInsArray()) {
// more text runs rs.addAll(Arrays.asList(c.getRArray()));
CTSdtRun[] sdts = paragraph.getSdtArray();
for (int i = 0; i < sdts.length; i++) {
CTSdtContentRun run = sdts[i].getSdtContent();
tmp = run.getRArray();
for (int j = 0; j < tmp.length; j++) {
rs.add(tmp[j]);
}
} }
// Get text of the paragraph // Get text of the paragraph

View File

@ -192,6 +192,13 @@ public class TestXWPFWordExtractor extends TestCase {
assertTrue(extractor.getText().contains("XXX")); assertTrue(extractor.getText().contains("XXX"));
} }
public void testInsertedDeletedText() throws Exception {
XWPFDocument doc = open("delins.docx");
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
assertTrue(extractor.getText().contains("pendant worn"));
assertTrue(extractor.getText().contains("extremely well"));
}
//TODO use the same logic for opening test files as in HSSFTestDataSamples //TODO use the same logic for opening test files as in HSSFTestDataSamples
private XWPFDocument open(String sampleFileName) throws IOException { private XWPFDocument open(String sampleFileName) throws IOException {