Improve paragraph text stuff, and further header tests

git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@684273 13f79535-47bb-0310-9956-ffa450edef68
2008-08-09 14:50:16 +00:00 · 2008-08-09 14:50:16 +00:00 · 0740316520
commit 0740316520
parent c96a79fdf9
7 changed files with 99 additions and 23 deletions
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@ -37,6 +37,8 @@

 		<!-- Don't forget to update status.xml too! -->
        <release version="3.5.1-beta2" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
+           <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
           <action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
           <action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
           <action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@ -34,6 +34,8 @@
 	<!-- Don't forget to update changes.xml too! -->
    <changes>
        <release version="3.5.1-beta2" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
+           <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
           <action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
           <action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
           <action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
@ -39,7 +39,8 @@ public abstract class XWPFHeaderFooter {
 	 * Returns the paragraph(s) that holds
 	 *  the text of the header or footer.
 	 * Normally there is only the one paragraph, but
-	 *  there could be more in certain cases.
+	 *  there could be more in certain cases, or 
+	 *  a table.
 	 */
 	public XWPFParagraph[] getParagraphs() {
 		XWPFParagraph[] paras = 
@ -51,6 +52,24 @@ public abstract class XWPFHeaderFooter {
 		}
 		return paras;
 	}
+	/**
+	 * Return the table(s) that holds the text
+	 *  of the header or footer, for complex cases
+	 *  where a paragraph isn't used.
+	 * Normally there's just one paragraph, but some
+	 *  complex headers/footers have a table or two
+	 *  in addition. 
+	 */
+	public XWPFTable[] getTables() {
+		XWPFTable[] tables = 
+			new XWPFTable[headerFooter.getTblArray().length];
+		for(int i=0; i<tables.length; i++) {
+			tables[i] = new XWPFTable(
+					headerFooter.getTblArray(i)
+			);
+		}
+		return tables;
+	}
 	
 	/**
 	 * Returns the textual content of the header/footer,
@ -58,11 +77,21 @@ public abstract class XWPFHeaderFooter {
 	 */
 	public String getText() {
 		StringBuffer t = new StringBuffer();
+		
 		XWPFParagraph[] paras = getParagraphs();
 		for(int i=0; i<paras.length; i++) {
+			if(! paras[i].isEmpty()) {
 				t.append(paras[i].getText());
 				t.append('\n');
 			}
+		}
+		
+		XWPFTable[] tables = getTables();
+		for(int i=0; i<tables.length; i++) {
+			t.append(tables[i].getText());
+			t.append('\n');
+		}
+		
 		return t.toString(); 
 	}
 }
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
@ -16,6 +16,8 @@
 ==================================================================== */
 package org.apache.poi.xwpf.usermodel;

+import java.util.ArrayList;
+
 import org.apache.poi.xwpf.XWPFDocument;
 import org.apache.poi.xwpf.model.XMLParagraph;
 import org.apache.xmlbeans.XmlCursor;
@ -24,6 +26,10 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
 import org.w3c.dom.NodeList;
 import org.w3c.dom.Text;
@ -43,15 +49,37 @@ public class XWPFParagraph extends XMLParagraph
    public XWPFParagraph(CTP prgrph, XWPFDocument docRef)
    {
        super(prgrph);
-        
        this.docRef = docRef;
-        CTR[] rs = paragraph.getRArray();
+        
+        // All the runs to loop over
+        // TODO - replace this with some sort of XPath expression
+        //  to directly find all the CTRs, in the right order
+        ArrayList<CTR> rs = new ArrayList<CTR>();
+        CTR[] tmp;
+        
+        // Get the main text runs
+        tmp = paragraph.getRArray();
+        for(int i=0; i<tmp.length; i++) {
+        	rs.add(tmp[i]);
+        }
+        
+        // Not sure quite what these are, but they hold 
+        //  more text runs
+        CTSdtRun[] sdts = paragraph.getSdtArray();
+        for(int i=0; i<sdts.length; i++) {
+        	CTSdtContentRun run = sdts[i].getSdtContent();
+        	tmp = run.getRArray();
+            for(int j=0; j<tmp.length; j++) {
+            	rs.add(tmp[j]);
+            }
+        }
+    
        
        // Get text of the paragraph
-        for (int j = 0; j < rs.length; j++) {
+        for (int j = 0; j < rs.size(); j++) {
            // Grab the text and tabs of the paragraph
        	// Do so in a way that preserves the ordering
-        	XmlCursor c = rs[j].newCursor();
+        	XmlCursor c = rs.get(j).newCursor();
        	c.selectPath( "./*" );
        	while(c.toNextSelection()) {
        		XmlObject o = c.getObject();
@ -65,7 +93,7 @@ public class XWPFParagraph extends XMLParagraph
        	
            // Loop over pictures inside our
            //  paragraph, looking for text in them
-            CTPicture[] picts = rs[j].getPictArray();
+            CTPicture[] picts = rs.get(j).getPictArray();
            for (int k = 0; k < picts.length; k++) {
                XmlObject[] t = picts[k].selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t");
                for (int m = 0; m < t.length; m++) {
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java
@ -32,24 +32,26 @@ public class XWPFTable
 {
    protected StringBuffer text=new StringBuffer(); 
    
-    public XWPFTable(CTTbl table)
-    {
-        for(CTRow row : table.getTrArray())
-        {
-            for(CTTc cell : row.getTcArray())
-            {
-                for(CTP ctp : cell.getPArray())
-                {
+    public XWPFTable(CTTbl table) {
+        for(CTRow row : table.getTrArray()) {
+        	StringBuffer rowText = new StringBuffer();
+            for(CTTc cell : row.getTcArray()) {
+                for(CTP ctp : cell.getPArray()) {
                    XWPFParagraph p = new XWPFParagraph(ctp);
-                    this.text.append(p.getText()+"\t");
+                    if(rowText.length() > 0) {
+                    	rowText.append('\t');
+                    }
+                    rowText.append(p.getText());
                }
            }
-            this.text.append("\n");
+            if(rowText.length() > 0) {
+            	this.text.append(rowText);
+            	this.text.append('\n');
+            }
        }
    }
    
-    public String getText()
-    {
+    public String getText() {
        return text.toString();
    }
 }
--- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
@ -111,7 +111,7 @@ public class TestXWPFWordExtractor extends TestCase {
 		assertTrue(text.length() > 0);
 		
 		char euro = '\u20ac';
-//		System.err.println("'"+text.substring(text.length() - 20) + "'");
+//		System.err.println("'"+text.substring(text.length() - 40) + "'");
 		
 		// Check contents
 		assertTrue(text.startsWith(
@ -121,7 +121,7 @@ public class TestXWPFWordExtractor extends TestCase {
 				"As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
 		));
 		assertTrue(text.endsWith(
-				"11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\t\n\n"
+				"11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n"
 		));
 		
 		// Check number of paragraphs
--- a/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java
@ -165,7 +165,7 @@ public class TestXWPFHeaderFooterPolicy extends TestCase {
 	public void testContents() throws Exception {
 		XWPFHeaderFooterPolicy policy;
 		
-		// Just test a few bits
+		// Test a few simple bits off a simple header
 		policy = diffFirst.getHeaderFooterPolicy();
 		
 		assertEquals(
@ -176,5 +176,18 @@ public class TestXWPFHeaderFooterPolicy extends TestCase {
 				"First header column!\tMid header\tRight header!\n", 
 				policy.getDefaultHeader().getText()
 		);
+		
+		
+		// And a few bits off a more complex header
+		policy = oddEven.getHeaderFooterPolicy();
+		
+		assertEquals(
+			"\n[]ODD Page Header text\n\n",
+			policy.getDefaultHeader().getText()
+		);
+		assertEquals(
+				"\n[This is an Even Page, with a Header]\n\n", 
+				policy.getEvenPageHeader().getText()
+		);
 	}
 }