hwpf: append any character data before paragraphs to first paragraph

(workaround for bug#48075)


git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@982238 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Maxim Valyanskiy 2010-08-04 12:43:58 +00:00
parent 8486ae97d3
commit e52778847c
4 changed files with 23 additions and 1 deletions

View File

@ -101,6 +101,14 @@ public class Paragraph extends Range implements Cloneable {
_istd = papx.getIstd();
}
protected Paragraph(PAPX papx, Range parent, int start)
{
super(Math.max(parent._start, start), Math.min(parent._end, papx.getEnd()), parent);
_props = papx.getParagraphProperties(_doc.getStyleSheet());
_papx = papx.getSprmBuf();
_istd = papx.getIstd();
}
public short getStyleIndex()
{
return _istd;

View File

@ -829,8 +829,12 @@ public class Range { // TODO -instantiable superclass
Paragraph pap = null;
if (props.getIlfo() > 0) {
pap = new ListEntry(papx, this, _doc.getListTables());
} else {
if (((index + _parStart)==0) && papx.getStart()>0) {
pap = new Paragraph(papx, this, 0);
} else {
pap = new Paragraph(papx, this);
}
}
return pap;

View File

@ -298,4 +298,14 @@ public final class TestWordExtractor extends TestCase {
assertTrue(text.contains("\u0425\u0425\u0425\u0425\u0425"));
assertTrue(text.contains("\u0423\u0423\u0423\u0423\u0423"));
}
public void testFirstParagraphFix() throws Exception {
extractor = new WordExtractor(
POIDataSamples.getDocumentInstance().openResourceAsStream("MBD001D0B89.doc")
);
String text = extractor.getText();
assertTrue(text.startsWith("\u041f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435"));
}
}

Binary file not shown.