hwpf: append any character data before paragraphs to first paragraph
(workaround for bug#48075) git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@982238 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8486ae97d3
commit
e52778847c
@ -101,6 +101,14 @@ public class Paragraph extends Range implements Cloneable {
|
||||
_istd = papx.getIstd();
|
||||
}
|
||||
|
||||
protected Paragraph(PAPX papx, Range parent, int start)
|
||||
{
|
||||
super(Math.max(parent._start, start), Math.min(parent._end, papx.getEnd()), parent);
|
||||
_props = papx.getParagraphProperties(_doc.getStyleSheet());
|
||||
_papx = papx.getSprmBuf();
|
||||
_istd = papx.getIstd();
|
||||
}
|
||||
|
||||
public short getStyleIndex()
|
||||
{
|
||||
return _istd;
|
||||
|
@ -830,7 +830,11 @@ public class Range { // TODO -instantiable superclass
|
||||
if (props.getIlfo() > 0) {
|
||||
pap = new ListEntry(papx, this, _doc.getListTables());
|
||||
} else {
|
||||
pap = new Paragraph(papx, this);
|
||||
if (((index + _parStart)==0) && papx.getStart()>0) {
|
||||
pap = new Paragraph(papx, this, 0);
|
||||
} else {
|
||||
pap = new Paragraph(papx, this);
|
||||
}
|
||||
}
|
||||
|
||||
return pap;
|
||||
|
@ -298,4 +298,14 @@ public final class TestWordExtractor extends TestCase {
|
||||
assertTrue(text.contains("\u0425\u0425\u0425\u0425\u0425"));
|
||||
assertTrue(text.contains("\u0423\u0423\u0423\u0423\u0423"));
|
||||
}
|
||||
|
||||
public void testFirstParagraphFix() throws Exception {
|
||||
extractor = new WordExtractor(
|
||||
POIDataSamples.getDocumentInstance().openResourceAsStream("MBD001D0B89.doc")
|
||||
);
|
||||
|
||||
String text = extractor.getText();
|
||||
|
||||
assertTrue(text.startsWith("\u041f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435"));
|
||||
}
|
||||
}
|
||||
|
BIN
test-data/document/MBD001D0B89.doc
Normal file
BIN
test-data/document/MBD001D0B89.doc
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user