WPFWordExtractor does not extract inserted/deleted text, see Bugzilla 45597

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@797740 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2009-07-25 10:41:53 +00:00
parent 47909f9f88
commit 307f443c33
4 changed files with 18 additions and 14 deletions

View File

@ -33,6 +33,7 @@
<changes>
<release version="3.5-beta7" date="2009-??-??">
<action dev="POI-DEVELOPERS" type="fix">47571 - Fixed XWPFWordExtractor to extract inserted/deleted text</action>
<action dev="POI-DEVELOPERS" type="fix">47548 - Fixed RecordFactoryInputStream to properly read continued DrawingRecords</action>
<action dev="POI-DEVELOPERS" type="fix">46419 - Fixed compatibility issue with OpenOffice 3.0</action>
<action dev="POI-DEVELOPERS" type="fix">47559 - Fixed compatibility issue with Excel 2008 Mac sp2</action>

View File

@ -18,6 +18,7 @@ package org.apache.poi.xwpf.usermodel;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import org.apache.xmlbeans.XmlCursor;
import org.apache.xmlbeans.XmlObject;
@ -56,23 +57,18 @@ public class XWPFParagraph {
// TODO - replace this with some sort of XPath expression
// to directly find all the CTRs, in the right order
ArrayList<CTR> rs = new ArrayList<CTR>();
CTR[] tmp;
rs.addAll(Arrays.asList(paragraph.getRArray()));
// Get the main text runs
tmp = paragraph.getRArray();
for (int i = 0; i < tmp.length; i++) {
rs.add(tmp[i]);
for (CTSdtRun sdt : paragraph.getSdtArray()) {
CTSdtContentRun run = sdt.getSdtContent();
rs.addAll(Arrays.asList(run.getRArray()));
}
for (CTRunTrackChange c : paragraph.getDelArray()) {
rs.addAll(Arrays.asList(c.getRArray()));
}
// Not sure quite what these are, but they hold
// more text runs
CTSdtRun[] sdts = paragraph.getSdtArray();
for (int i = 0; i < sdts.length; i++) {
CTSdtContentRun run = sdts[i].getSdtContent();
tmp = run.getRArray();
for (int j = 0; j < tmp.length; j++) {
rs.add(tmp[j]);
}
for (CTRunTrackChange c : paragraph.getInsArray()) {
rs.addAll(Arrays.asList(c.getRArray()));
}
// Get text of the paragraph

View File

@ -192,6 +192,13 @@ public class TestXWPFWordExtractor extends TestCase {
assertTrue(extractor.getText().contains("XXX"));
}
public void testInsertedDeletedText() throws Exception {
XWPFDocument doc = open("delins.docx");
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
assertTrue(extractor.getText().contains("pendant worn"));
assertTrue(extractor.getText().contains("extremely well"));
}
//TODO use the same logic for opening test files as in HSSFTestDataSamples
private XWPFDocument open(String sampleFileName) throws IOException {