Patch from bug #45592 - improve xwpf text extraction
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@684219 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
beb26b5c76
commit
2d50615c87
@ -37,6 +37,7 @@
|
|||||||
|
|
||||||
<!-- Don't forget to update status.xml too! -->
|
<!-- Don't forget to update status.xml too! -->
|
||||||
<release version="3.5.1-beta2" date="2008-??-??">
|
<release version="3.5.1-beta2" date="2008-??-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
|
<action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
|
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
|
<action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
<!-- Don't forget to update changes.xml too! -->
|
<!-- Don't forget to update changes.xml too! -->
|
||||||
<changes>
|
<changes>
|
||||||
<release version="3.5.1-beta2" date="2008-??-??">
|
<release version="3.5.1-beta2" date="2008-??-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
|
<action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
|
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
|
<action dev="POI-DEVELOPERS" type="add">45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning</action>
|
||||||
|
@ -89,6 +89,20 @@ public class XWPFParagraph extends XMLParagraph
|
|||||||
* including text from pictures in it.
|
* including text from pictures in it.
|
||||||
*/
|
*/
|
||||||
public String getText() {
|
public String getText() {
|
||||||
|
return getParagraphText() + getPictureText();
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Returns the text of the paragraph, but not
|
||||||
|
* of any objects in the paragraph
|
||||||
|
*/
|
||||||
|
public String getParagraphText() {
|
||||||
return text.toString();
|
return text.toString();
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Returns any text from any suitable
|
||||||
|
* pictures in the paragraph
|
||||||
|
*/
|
||||||
|
public String getPictureText() {
|
||||||
|
return pictureText.toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -117,9 +117,12 @@ public class TestXWPFWordExtractor extends TestCase {
|
|||||||
assertTrue(text.startsWith(
|
assertTrue(text.startsWith(
|
||||||
" \n(V) ILLUSTRATIVE CASES\n\n"
|
" \n(V) ILLUSTRATIVE CASES\n\n"
|
||||||
));
|
));
|
||||||
assertTrue(text.endsWith(
|
assertTrue(text.contains(
|
||||||
"As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
|
"As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
|
||||||
));
|
));
|
||||||
|
assertTrue(text.endsWith(
|
||||||
|
"11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\t\n\n"
|
||||||
|
));
|
||||||
|
|
||||||
// Check number of paragraphs
|
// Check number of paragraphs
|
||||||
int ps = 0;
|
int ps = 0;
|
||||||
@ -127,7 +130,7 @@ public class TestXWPFWordExtractor extends TestCase {
|
|||||||
for (int i = 0; i < t.length; i++) {
|
for (int i = 0; i < t.length; i++) {
|
||||||
if(t[i] == '\n') { ps++; }
|
if(t[i] == '\n') { ps++; }
|
||||||
}
|
}
|
||||||
assertEquals(79, ps);
|
assertEquals(103, ps);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testGetWithHyperlinks() throws Exception {
|
public void testGetWithHyperlinks() throws Exception {
|
||||||
|
Loading…
Reference in New Issue
Block a user