Patch from bug #45592 - improve xwpf text extraction
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@684219 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
beb26b5c76
commit
2d50615c87
@ -37,6 +37,7 @@
|
||||
|
||||
<!-- Don't forget to update status.xml too! -->
|
||||
<release version="3.5.1-beta2" date="2008-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
|
||||
|
@ -34,6 +34,7 @@
|
||||
<!-- Don't forget to update changes.xml too! -->
|
||||
<changes>
|
||||
<release version="3.5.1-beta2" date="2008-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
|
||||
|
@ -89,6 +89,20 @@ public class XWPFParagraph extends XMLParagraph
|
||||
* including text from pictures in it.
|
||||
*/
|
||||
public String getText() {
|
||||
return getParagraphText() + getPictureText();
|
||||
}
|
||||
/**
|
||||
* Returns the text of the paragraph, but not
|
||||
* of any objects in the paragraph
|
||||
*/
|
||||
public String getParagraphText() {
|
||||
return text.toString();
|
||||
}
|
||||
/**
|
||||
* Returns any text from any suitable
|
||||
* pictures in the paragraph
|
||||
*/
|
||||
public String getPictureText() {
|
||||
return pictureText.toString();
|
||||
}
|
||||
}
|
||||
|
@ -117,9 +117,12 @@ public class TestXWPFWordExtractor extends TestCase {
|
||||
assertTrue(text.startsWith(
|
||||
" \n(V) ILLUSTRATIVE CASES\n\n"
|
||||
));
|
||||
assertTrue(text.endsWith(
|
||||
assertTrue(text.contains(
|
||||
"As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
|
||||
));
|
||||
assertTrue(text.endsWith(
|
||||
"11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\t\n\n"
|
||||
));
|
||||
|
||||
// Check number of paragraphs
|
||||
int ps = 0;
|
||||
@ -127,7 +130,7 @@ public class TestXWPFWordExtractor extends TestCase {
|
||||
for (int i = 0; i < t.length; i++) {
|
||||
if(t[i] == '\n') { ps++; }
|
||||
}
|
||||
assertEquals(79, ps);
|
||||
assertEquals(103, ps);
|
||||
}
|
||||
|
||||
public void testGetWithHyperlinks() throws Exception {
|
||||
|
Loading…
Reference in New Issue
Block a user