hwpf: append any character data before paragraphs to first paragraph

(workaround for bug#48075)


git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@982238 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Maxim Valyanskiy 2010-08-04 12:43:58 +00:00
parent 8486ae97d3
commit e52778847c
4 changed files with 23 additions and 1 deletions

View File

@ -101,6 +101,14 @@ public class Paragraph extends Range implements Cloneable {
_istd = papx.getIstd(); _istd = papx.getIstd();
} }
protected Paragraph(PAPX papx, Range parent, int start)
{
super(Math.max(parent._start, start), Math.min(parent._end, papx.getEnd()), parent);
_props = papx.getParagraphProperties(_doc.getStyleSheet());
_papx = papx.getSprmBuf();
_istd = papx.getIstd();
}
public short getStyleIndex() public short getStyleIndex()
{ {
return _istd; return _istd;

View File

@ -830,7 +830,11 @@ public class Range { // TODO -instantiable superclass
if (props.getIlfo() > 0) { if (props.getIlfo() > 0) {
pap = new ListEntry(papx, this, _doc.getListTables()); pap = new ListEntry(papx, this, _doc.getListTables());
} else { } else {
pap = new Paragraph(papx, this); if (((index + _parStart)==0) && papx.getStart()>0) {
pap = new Paragraph(papx, this, 0);
} else {
pap = new Paragraph(papx, this);
}
} }
return pap; return pap;

View File

@ -298,4 +298,14 @@ public final class TestWordExtractor extends TestCase {
assertTrue(text.contains("\u0425\u0425\u0425\u0425\u0425")); assertTrue(text.contains("\u0425\u0425\u0425\u0425\u0425"));
assertTrue(text.contains("\u0423\u0423\u0423\u0423\u0423")); assertTrue(text.contains("\u0423\u0423\u0423\u0423\u0423"));
} }
public void testFirstParagraphFix() throws Exception {
extractor = new WordExtractor(
POIDataSamples.getDocumentInstance().openResourceAsStream("MBD001D0B89.doc")
);
String text = extractor.getText();
assertTrue(text.startsWith("\u041f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435"));
}
} }

Binary file not shown.