hwpf: append any character data before paragraphs to first paragraph
(workaround for bug#48075) git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@982238 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8486ae97d3
commit
e52778847c
@ -101,6 +101,14 @@ public class Paragraph extends Range implements Cloneable {
|
|||||||
_istd = papx.getIstd();
|
_istd = papx.getIstd();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected Paragraph(PAPX papx, Range parent, int start)
|
||||||
|
{
|
||||||
|
super(Math.max(parent._start, start), Math.min(parent._end, papx.getEnd()), parent);
|
||||||
|
_props = papx.getParagraphProperties(_doc.getStyleSheet());
|
||||||
|
_papx = papx.getSprmBuf();
|
||||||
|
_istd = papx.getIstd();
|
||||||
|
}
|
||||||
|
|
||||||
public short getStyleIndex()
|
public short getStyleIndex()
|
||||||
{
|
{
|
||||||
return _istd;
|
return _istd;
|
||||||
|
@ -830,7 +830,11 @@ public class Range { // TODO -instantiable superclass
|
|||||||
if (props.getIlfo() > 0) {
|
if (props.getIlfo() > 0) {
|
||||||
pap = new ListEntry(papx, this, _doc.getListTables());
|
pap = new ListEntry(papx, this, _doc.getListTables());
|
||||||
} else {
|
} else {
|
||||||
pap = new Paragraph(papx, this);
|
if (((index + _parStart)==0) && papx.getStart()>0) {
|
||||||
|
pap = new Paragraph(papx, this, 0);
|
||||||
|
} else {
|
||||||
|
pap = new Paragraph(papx, this);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return pap;
|
return pap;
|
||||||
|
@ -298,4 +298,14 @@ public final class TestWordExtractor extends TestCase {
|
|||||||
assertTrue(text.contains("\u0425\u0425\u0425\u0425\u0425"));
|
assertTrue(text.contains("\u0425\u0425\u0425\u0425\u0425"));
|
||||||
assertTrue(text.contains("\u0423\u0423\u0423\u0423\u0423"));
|
assertTrue(text.contains("\u0423\u0423\u0423\u0423\u0423"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testFirstParagraphFix() throws Exception {
|
||||||
|
extractor = new WordExtractor(
|
||||||
|
POIDataSamples.getDocumentInstance().openResourceAsStream("MBD001D0B89.doc")
|
||||||
|
);
|
||||||
|
|
||||||
|
String text = extractor.getText();
|
||||||
|
|
||||||
|
assertTrue(text.startsWith("\u041f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
BIN
test-data/document/MBD001D0B89.doc
Normal file
BIN
test-data/document/MBD001D0B89.doc
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user