Improve how XWPFParagraph works, so it is child order aware

git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@684239 13f79535-47bb-0310-9956-ffa450edef68
2008-08-09 12:15:32 +00:00 · 2008-08-09 12:15:32 +00:00 · de3eb33bd3
commit de3eb33bd3
parent 15cced0478
4 changed files with 161 additions and 12 deletions
--- a/src/ooxml/java/org/apache/poi/xwpf/XWPFDocument.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/XWPFDocument.java
@ -159,10 +159,14 @@ public class XWPFDocument extends POIXMLDocument {
 		return wordDoc.getDocument();
 	}
 	
-	public Iterator<XWPFParagraph> getParagraphsIterator()
-	{
+	public Iterator<XWPFParagraph> getParagraphsIterator() {
 		return paragraphs.iterator();
 	}
+	public XWPFParagraph[] getParagraphs() {
+		return paragraphs.toArray(
+			new XWPFParagraph[paragraphs.size()]
+		);
+	}
 	
 	public Iterator<XWPFTable> getTablesIterator()
 	{
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
@ -30,4 +30,39 @@ public abstract class XWPFHeaderFooter {
 	protected XWPFHeaderFooter() {
 		headerFooter = CTHdrFtr.Factory.newInstance();
 	}
+	
+	public CTHdrFtr _getHdrFtr() {
+		return headerFooter;
+	}
+
+	/**
+	 * Returns the paragraph(s) that holds
+	 *  the text of the header or footer.
+	 * Normally there is only the one paragraph, but
+	 *  there could be more in certain cases.
+	 */
+	public XWPFParagraph[] getParagraphs() {
+		XWPFParagraph[] paras = 
+			new XWPFParagraph[headerFooter.getPArray().length];
+		for(int i=0; i<paras.length; i++) {
+			paras[i] = new XWPFParagraph(
+					headerFooter.getPArray(i)
+			);
+		}
+		return paras;
+	}
+	
+	/**
+	 * Returns the textual content of the header/footer,
+	 *  by flattening out the text of its paragraph(s)
+	 */
+	public String getText() {
+		StringBuffer t = new StringBuffer();
+		XWPFParagraph[] paras = getParagraphs();
+		for (int i = 0; i < paras.length; i++) {
+			t.append(paras[i].getText());
+			t.append('\n');
+		}
+		return t.toString(); 
+	}
 }
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
@ -16,10 +16,12 @@
 ==================================================================== */
 package org.apache.poi.xwpf.usermodel;

-import org.apache.poi.xwpf.model.XMLParagraph;
 import org.apache.poi.xwpf.XWPFDocument;
+import org.apache.poi.xwpf.model.XMLParagraph;
+import org.apache.xmlbeans.XmlCursor;
 import org.apache.xmlbeans.XmlObject;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
@ -45,16 +47,22 @@ public class XWPFParagraph extends XMLParagraph
        this.docRef = docRef; 
        CTR[] rs = paragraph.getRArray();
    
-        // Get text
+        // Get text of the paragraph
        for (int j = 0; j < rs.length; j++) {
-            // Loop over text runs
-            CTText[] texts = rs[j].getTArray();
-            for (int k = 0; k < texts.length; k++) {
-                text.append(
-                        texts[k].getStringValue()
-                );
-            }
-            
+            // Grab the text and tabs of the paragraph
+        	// Do so in a way that preserves the ordering
+        	XmlCursor c = rs[j].newCursor();
+        	c.selectPath( "./*" );
+        	while(c.toNextSelection()) {
+        		XmlObject o = c.getObject();
+        		if(o instanceof CTText) {
+        			text.append( ((CTText)o).getStringValue() );
+        		}
+        		if(o instanceof CTPTab) {
+        			text.append("\t");
+        		}
+        	}
+        	
            // Loop over pictures inside our
            //  paragraph, looking for text in them
            CTPicture[] picts = rs[j].getPictArray();
@ -80,6 +88,12 @@ public class XWPFParagraph extends XMLParagraph
        this(paragraph.getCTP());
    }
    
+    
+    public boolean isEmpty() {
+    	return !paragraph.getDomNode().hasChildNodes();
+    }
+    
+    
    public XWPFDocument getDocRef() {
        return docRef;
    }
--- a/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFParagraph.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFParagraph.java
@ -0,0 +1,96 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.usermodel;
+
+import java.io.File;
+
+import org.apache.poi.POIXMLDocument;
+import org.apache.poi.xwpf.XWPFDocument;
+
+import junit.framework.TestCase;
+
+/**
+ * Tests for XWPF Paragraphs
+ */
+public class TestXWPFParagraph extends TestCase {
+	/**
+	 * A simple file
+	 */
+	private XWPFDocument xml;
+	private File file;
+	
+	protected void setUp() throws Exception {
+		super.setUp();
+		
+		file = new File(
+				System.getProperty("HWPF.testdata.path") +
+				File.separator + "ThreeColHead.docx"
+		);
+		assertTrue(file.exists());
+		xml = new XWPFDocument(POIXMLDocument.openPackage(file.toString()));
+	}
+	
+	/**
+	 * Check that we get the right paragraph from the header
+	 */
+	public void testHeaderParagraph() throws Exception {
+		XWPFHeader hdr = xml.getDocumentHeader();
+		assertNotNull(hdr);
+		
+		XWPFParagraph[] ps = hdr.getParagraphs();
+		assertEquals(1, ps.length);
+		XWPFParagraph p = ps[0];
+		
+		assertEquals(5, p.getCTP().getRArray().length);
+		assertEquals(
+				"First header column!\tMid header\tRight header!",
+				p.getText()
+		);
+	}
+	
+	/**
+	 * Check that we get the right paragraphs from the document
+	 */
+	public void testDocumentParagraph() throws Exception {
+		XWPFParagraph[] ps = xml.getParagraphs();
+		assertEquals(10, ps.length);
+		
+		assertFalse(ps[0].isEmpty());
+		assertEquals(
+				"This is a sample word document. It has two pages. It has a three column heading, but no footer.",
+				ps[0].getText()
+		);
+		
+		assertTrue(ps[1].isEmpty());
+		assertEquals("", ps[1].getText());
+		
+		assertFalse(ps[2].isEmpty());
+		assertEquals(
+				"HEADING TEXT",
+				ps[2].getText()
+		);
+		
+		assertTrue(ps[3].isEmpty());
+		assertEquals("", ps[3].getText());
+		
+		assertFalse(ps[4].isEmpty());
+		assertEquals(
+				"More on page one",
+				ps[4].getText()
+		);
+	}
+}