diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index ac0604d77..f1dd05950 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,8 @@ + Improve how XWPF handles paragraph text + Support in XWPF handles headers and footers 45592 - Improve XWPF text extraction to include tables always, and picture text where possible 45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text 45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 89114557f..60ad6c092 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,8 @@ + Improve how XWPF handles paragraph text + Support in XWPF handles headers and footers 45592 - Improve XWPF text extraction to include tables always, and picture text where possible 45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text 45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java index 7150014e2..36de22919 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java @@ -39,7 +39,8 @@ public abstract class XWPFHeaderFooter { * Returns the paragraph(s) that holds * the text of the header or footer. * Normally there is only the one paragraph, but - * there could be more in certain cases. + * there could be more in certain cases, or + * a table. */ public XWPFParagraph[] getParagraphs() { XWPFParagraph[] paras = @@ -51,6 +52,24 @@ public abstract class XWPFHeaderFooter { } return paras; } + /** + * Return the table(s) that holds the text + * of the header or footer, for complex cases + * where a paragraph isn't used. + * Normally there's just one paragraph, but some + * complex headers/footers have a table or two + * in addition. + */ + public XWPFTable[] getTables() { + XWPFTable[] tables = + new XWPFTable[headerFooter.getTblArray().length]; + for(int i=0; i rs = new ArrayList(); + CTR[] tmp; + + // Get the main text runs + tmp = paragraph.getRArray(); + for(int i=0; i 0) { + rowText.append('\t'); + } + rowText.append(p.getText()); } } - this.text.append("\n"); + if(rowText.length() > 0) { + this.text.append(rowText); + this.text.append('\n'); + } } } - public String getText() - { + public String getText() { return text.toString(); } } diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java index b61af2f06..1b26bb58a 100644 --- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java @@ -111,7 +111,7 @@ public class TestXWPFWordExtractor extends TestCase { assertTrue(text.length() > 0); char euro = '\u20ac'; -// System.err.println("'"+text.substring(text.length() - 20) + "'"); +// System.err.println("'"+text.substring(text.length() - 40) + "'"); // Check contents assertTrue(text.startsWith( @@ -121,7 +121,7 @@ public class TestXWPFWordExtractor extends TestCase { "As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n" )); assertTrue(text.endsWith( - "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\t\n\n" + "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n" )); // Check number of paragraphs diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java b/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java index b1f697165..b2269c290 100644 --- a/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java @@ -165,7 +165,7 @@ public class TestXWPFHeaderFooterPolicy extends TestCase { public void testContents() throws Exception { XWPFHeaderFooterPolicy policy; - // Just test a few bits + // Test a few simple bits off a simple header policy = diffFirst.getHeaderFooterPolicy(); assertEquals( @@ -176,5 +176,18 @@ public class TestXWPFHeaderFooterPolicy extends TestCase { "First header column!\tMid header\tRight header!\n", policy.getDefaultHeader().getText() ); + + + // And a few bits off a more complex header + policy = oddEven.getHeaderFooterPolicy(); + + assertEquals( + "\n[]ODD Page Header text\n\n", + policy.getDefaultHeader().getText() + ); + assertEquals( + "\n[This is an Even Page, with a Header]\n\n", + policy.getEvenPageHeader().getText() + ); } }