From 10c9ab866bed82c2a01ce2ab2be442b09d33d829 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Sun, 27 Apr 2008 16:36:51 +0000 Subject: [PATCH] Improved hyperlink and comment fetching for xwpf text extraction, based on the patch from bug #44821 git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@651979 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/xwpf/XWPFDocument.java | 113 +++++++++++++++--- .../poi/xwpf/extractor/XWPFWordExtractor.java | 76 ++++-------- .../apache/poi/xwpf/model/XMLParagraph.java | 37 ++++++ .../poi/xwpf/model/XWPFCommentsDecorator.java | 52 ++++++++ .../xwpf/model/XWPFHyperlinkDecorator.java | 69 +++++++++++ .../xwpf/model/XWPFParagraphDecorator.java | 43 +++++++ .../poi/xwpf/usermodel/XWPFComment.java | 61 ++++++++++ .../poi/xwpf/usermodel/XWPFHyperlink.java | 43 +++++++ .../poi/xwpf/usermodel/XWPFParagraph.java | 70 +++++++++++ .../poi/xwpf/usermodel/XWPFParagraphText.java | 6 + .../apache/poi/xwpf/usermodel/XWPFTable.java | 55 +++++++++ 11 files changed, 560 insertions(+), 65 deletions(-) create mode 100644 src/ooxml/java/org/apache/poi/xwpf/model/XMLParagraph.java create mode 100644 src/ooxml/java/org/apache/poi/xwpf/model/XWPFCommentsDecorator.java create mode 100644 src/ooxml/java/org/apache/poi/xwpf/model/XWPFHyperlinkDecorator.java create mode 100644 src/ooxml/java/org/apache/poi/xwpf/model/XWPFParagraphDecorator.java create mode 100644 src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFComment.java create mode 100644 src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHyperlink.java create mode 100644 src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java create mode 100644 src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraphText.java create mode 100644 src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java diff --git a/src/ooxml/java/org/apache/poi/xwpf/XWPFDocument.java b/src/ooxml/java/org/apache/poi/xwpf/XWPFDocument.java index 05b716d75..4338b11f2 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/XWPFDocument.java +++ b/src/ooxml/java/org/apache/poi/xwpf/XWPFDocument.java @@ -17,6 +17,9 @@ package org.apache.poi.xwpf; import java.io.IOException; +import java.util.LinkedList; +import java.util.List; +import java.util.Iterator; import org.apache.poi.POIXMLDocument; import org.apache.xmlbeans.XmlException; @@ -24,12 +27,22 @@ import org.openxml4j.exceptions.InvalidFormatException; import org.openxml4j.exceptions.OpenXML4JException; import org.openxml4j.opc.Package; import org.openxml4j.opc.PackagePart; +import org.openxml4j.opc.PackageRelationship; import org.openxml4j.opc.PackageRelationshipCollection; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTComment; import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument; import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CommentsDocument; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl; + +import org.apache.poi.xwpf.usermodel.XWPFHyperlink; +import org.apache.poi.xwpf.usermodel.XWPFParagraph; +import org.apache.poi.xwpf.usermodel.XWPFComment; +import org.apache.poi.xwpf.usermodel.XWPFTable; /** * Experimental class to do low level processing @@ -48,15 +61,59 @@ public class XWPFDocument extends POIXMLDocument { public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"; public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"; public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"; - public static final String HYPERLINK_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"; + public static final String HYPERLINK_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"; + public static final String COMMENT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments"; private DocumentDocument wordDoc; + protected List comments; + protected List hyperlinks; + protected List paragraphs; + protected List tables; public XWPFDocument(Package container) throws OpenXML4JException, IOException, XmlException { super(container); + + hyperlinks = new LinkedList(); + comments = new LinkedList(); + paragraphs = new LinkedList(); + tables= new LinkedList(); wordDoc = DocumentDocument.Factory.parse(getCorePart().getInputStream()); + + // filling paragraph list + for (CTP p : getDocumentBody().getPArray()) { + paragraphs.add(new XWPFParagraph(p, this)); + } + + // Get the hyperlinks + // TODO: make me optional/separated in private function + try { + Iterator relIter = + getCorePart().getRelationshipsByType(HYPERLINK_RELATION_TYPE).iterator(); + while(relIter.hasNext()) { + PackageRelationship rel = relIter.next(); + hyperlinks.add(new XWPFHyperlink(rel.getId(), rel.getTargetURI().toString())); + } + } catch(Exception e) { + throw new OpenXML4JException(e.getLocalizedMessage()); + } + + // Get the comments, if there are any + PackageRelationshipCollection commentsRel = getCmntRelations(); + if(commentsRel != null && commentsRel.size() > 0) { + PackagePart commentsPart = getTargetPart(commentsRel.getRelationship(0)); + CommentsDocument cmntdoc = CommentsDocument.Factory.parse(commentsPart.getInputStream()); + for(CTComment ctcomment : cmntdoc.getComments().getCommentArray()) + { + comments.add(new XWPFComment(ctcomment)); + } + + for(CTTbl table : getDocumentBody().getTblArray()) + { + tables.add(new XWPFTable(table)); + } + } } /** @@ -66,6 +123,42 @@ public class XWPFDocument extends POIXMLDocument { return wordDoc.getDocument(); } + public Iterator getParagraphsIterator() + { + return paragraphs.iterator(); + } + + public Iterator getTablesIterator() + { + return tables.iterator(); + } + + public XWPFHyperlink getHyperlinkByID(String id) + { + Iterator iter = hyperlinks.iterator(); + while(iter.hasNext()) + { + XWPFHyperlink link = iter.next(); + if(link.getId().equals(id)) + return link; + } + + return null; + } + + public XWPFComment getCommentByID(String id) + { + Iterator iter = comments.iterator(); + while(iter.hasNext()) + { + XWPFComment comment = iter.next(); + if(comment.getId().equals(id)) + return comment; + } + + return null; + } + /** * Returns the low level body of the document */ @@ -91,18 +184,10 @@ public class XWPFDocument extends POIXMLDocument { StylesDocument.Factory.parse(parts[0].getInputStream()); return sd.getStyles(); } - - /** - * Returns all the hyperlink relations for the file. - * You'll generally want to get the target to get - * the destination of the hyperlink - */ - public PackageRelationshipCollection getHyperlinks() { - try { - return getCorePart().getRelationshipsByType(HYPERLINK_RELATION_TYPE); - } catch(InvalidFormatException e) { - // Should never happen - throw new IllegalStateException(e); - } + + protected PackageRelationshipCollection getCmntRelations() throws InvalidFormatException + { + return getCorePart().getRelationshipsByType(COMMENT_RELATION_TYPE); } } + diff --git a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java index bd1936d16..8ca4f0349 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java +++ b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java @@ -17,19 +17,19 @@ package org.apache.poi.xwpf.extractor; import java.io.IOException; +import java.util.Iterator; import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.xwpf.XWPFDocument; +import org.apache.poi.xwpf.model.XWPFCommentsDecorator; +import org.apache.poi.xwpf.model.XWPFHyperlinkDecorator; +import org.apache.poi.xwpf.model.XWPFParagraphDecorator; +import org.apache.poi.xwpf.usermodel.XWPFParagraph; +import org.apache.poi.xwpf.usermodel.XWPFTable; import org.apache.xmlbeans.XmlException; import org.openxml4j.exceptions.OpenXML4JException; import org.openxml4j.opc.Package; -import org.openxml4j.opc.PackageRelationship; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText; /** * Helper class to extract text from an OOXML Word file @@ -45,6 +45,15 @@ public class XWPFWordExtractor extends POIXMLTextExtractor { super(document); this.document = document; } + + /** + * Should we also fetch the hyperlinks, when fetching + * the text content? Default is to only output the + * hyperlink label, and not the contents + */ + public void setFetchHyperlinks(boolean fetch) { + fetchHyperlinks = fetch; + } public static void main(String[] args) throws Exception { if(args.length < 1) { @@ -59,56 +68,21 @@ public class XWPFWordExtractor extends POIXMLTextExtractor { System.out.println(extractor.getText()); } - /** - * Should we also fetch the hyperlinks, when fetching - * the text content? Default is to only output the - * hyperlink label, and not the contents - */ - public void setFetchHyperlinks(boolean fetch) { - fetchHyperlinks = fetch; - } - public String getText() { - CTBody body = document.getDocumentBody(); StringBuffer text = new StringBuffer(); - // Loop over paragraphs - CTP[] ps = body.getPArray(); - for (int i = 0; i < ps.length; i++) { - // Loop over ranges and hyperlinks - // TODO - properly intersperce ranges and hyperlinks - CTR[] rs = ps[i].getRArray(); - for(int j = 0; j < rs.length; j++) { - // Loop over text runs - CTText[] texts = rs[j].getTArray(); - for (int k = 0; k < texts.length; k++) { - text.append( - texts[k].getStringValue() - ); - } - } - CTHyperlink[] hls = ps[i].getHyperlinkArray(); - for(CTHyperlink hl : hls) { - for(CTR r : hl.getRArray()) { - for(CTText txt : r.getTArray()) { - text.append(txt.getStringValue()); - } - } - if(fetchHyperlinks) { - String id = hl.getId(); - if(id != null) { - PackageRelationship hlRel = - document.getHyperlinks().getRelationshipByID(id); - if(hlRel != null) { - text.append(" <" + hlRel.getTargetURI().toString() + ">"); - } - } - } - } + Iterator i = document.getParagraphsIterator(); + while(i.hasNext()) { + XWPFParagraphDecorator decorator = new XWPFCommentsDecorator( + new XWPFHyperlinkDecorator(i.next(), null, fetchHyperlinks)); + text.append(decorator.getText()+"\n"); + } - // New line after each paragraph. - text.append("\n"); + Iterator j = document.getTablesIterator(); + while(j.hasNext()) + { + text.append(j.next().getText()+"\n"); } return text.toString(); diff --git a/src/ooxml/java/org/apache/poi/xwpf/model/XMLParagraph.java b/src/ooxml/java/org/apache/poi/xwpf/model/XMLParagraph.java new file mode 100644 index 000000000..c7d4bc261 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xwpf/model/XMLParagraph.java @@ -0,0 +1,37 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xwpf.model; + +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; + +/** + * Base class for XWPF paragraphs + * + * @author Yury Batrakov (batrakov at gmail.com) + * + */ +public class XMLParagraph { + protected CTP paragraph; + + public XMLParagraph(CTP paragraph) { + this.paragraph = paragraph; + } + + public CTP getCTP() { + return paragraph; + } +} \ No newline at end of file diff --git a/src/ooxml/java/org/apache/poi/xwpf/model/XWPFCommentsDecorator.java b/src/ooxml/java/org/apache/poi/xwpf/model/XWPFCommentsDecorator.java new file mode 100644 index 000000000..1abc26260 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xwpf/model/XWPFCommentsDecorator.java @@ -0,0 +1,52 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xwpf.model; + +import org.apache.poi.xwpf.usermodel.XWPFComment; +import org.apache.poi.xwpf.usermodel.XWPFParagraph; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTMarkupRange; + +/** + * Decorator class for XWPFParagraph allowing to add comments + * found in paragraph to its text + * + * @author Yury Batrakov (batrakov at gmail.com) + * + */ +public class XWPFCommentsDecorator extends XWPFParagraphDecorator { + private StringBuffer commentText; + + public XWPFCommentsDecorator(XWPFParagraphDecorator nextDecorator) { + this(nextDecorator.paragraph, nextDecorator); + } + public XWPFCommentsDecorator(XWPFParagraph paragraph, XWPFParagraphDecorator nextDecorator) { + super(paragraph, nextDecorator); + + XWPFComment comment; + commentText = new StringBuffer(); + + for(CTMarkupRange anchor : paragraph.getCTP().getCommentRangeStartArray()) + { + if((comment = paragraph.getDocRef().getCommentByID(anchor.getId().toString())) != null) + commentText.append("\tComment by " + comment.getAuthor()+": "+comment.getText()); + } + } + + public String getText() { + return super.getText() + commentText; + } +} diff --git a/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHyperlinkDecorator.java b/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHyperlinkDecorator.java new file mode 100644 index 000000000..ab5784bf7 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHyperlinkDecorator.java @@ -0,0 +1,69 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xwpf.model; + +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText; +import org.apache.poi.xwpf.usermodel.XWPFParagraph;; + +/** + * Decorator class for XWPFParagraph allowing to add hyperlinks + * found in paragraph to its text. + * + * TODO - add the hyperlink text in the right place, and not just + * at the end + */ +public class XWPFHyperlinkDecorator extends XWPFParagraphDecorator { + private StringBuffer hyperlinkText; + + /** + * @param nextDecorator The next decorator to use + * @param outputHyperlinkUrls Should we output the links too, or just the link text? + */ + public XWPFHyperlinkDecorator(XWPFParagraphDecorator nextDecorator, boolean outputHyperlinkUrls) { + this(nextDecorator.paragraph, nextDecorator, outputHyperlinkUrls); + } + + /** + * @param prgrph The paragraph of text to work on + * @param outputHyperlinkUrls Should we output the links too, or just the link text? + */ + public XWPFHyperlinkDecorator(XWPFParagraph prgrph, XWPFParagraphDecorator nextDecorator, boolean outputHyperlinkUrls) { + super(prgrph, nextDecorator); + + hyperlinkText = new StringBuffer(); + + // loop over hyperlink anchors + for(CTHyperlink link : paragraph.getCTP().getHyperlinkArray()){ + for (CTR r : link.getRArray()) { + // Loop over text runs + for (CTText text : r.getTArray()){ + hyperlinkText.append(text.getStringValue()); + } + } + if(outputHyperlinkUrls && paragraph.getDocRef().getHyperlinkByID(link.getId()) != null) { + hyperlinkText.append(" <"+paragraph.getDocRef().getHyperlinkByID(link.getId()).getURL()+">"); + } + } + } + + public String getText() + { + return super.getText() + hyperlinkText; + } +} diff --git a/src/ooxml/java/org/apache/poi/xwpf/model/XWPFParagraphDecorator.java b/src/ooxml/java/org/apache/poi/xwpf/model/XWPFParagraphDecorator.java new file mode 100644 index 000000000..3946b22b8 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xwpf/model/XWPFParagraphDecorator.java @@ -0,0 +1,43 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xwpf.model; + +import org.apache.poi.xwpf.usermodel.XWPFParagraph; + +/** + * Base decorator class for XWPFParagraph + */ +public abstract class XWPFParagraphDecorator { + protected XWPFParagraph paragraph; + protected XWPFParagraphDecorator nextDecorator; + + public XWPFParagraphDecorator(XWPFParagraph paragraph) { + this(paragraph, null); + } + + public XWPFParagraphDecorator(XWPFParagraph paragraph, XWPFParagraphDecorator nextDecorator) { + this.paragraph = paragraph; + this.nextDecorator = nextDecorator; + } + + public String getText() { + if(nextDecorator != null) { + return nextDecorator.getText(); + } + return paragraph.getText(); + } +} diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFComment.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFComment.java new file mode 100644 index 000000000..7de86d400 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFComment.java @@ -0,0 +1,61 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xwpf.usermodel; + +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTComment; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; + +/** + * Sketch of XWPF comment class + * +* @author Yury Batrakov (batrakov at gmail.com) + * + */ +public class XWPFComment +{ + protected String id; + protected String author; + protected StringBuffer text; + + public XWPFComment(CTComment comment) + { + text = new StringBuffer(); + id = comment.getId().toString(); + author = comment.getAuthor(); + + for(CTP ctp : comment.getPArray()) + { + XWPFParagraph p = new XWPFParagraph(ctp); + text.append(p.getText()); + } + } + + public String getId() + { + return id; + } + + public String getAuthor() + { + return author; + } + + public String getText() + { + return text.toString(); + } +} diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHyperlink.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHyperlink.java new file mode 100644 index 000000000..f9f2f9d32 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHyperlink.java @@ -0,0 +1,43 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xwpf.usermodel; + +/** + * Sketch of XWPF hyperlink class + * +* @author Yury Batrakov (batrakov at gmail.com) + * + */ +public class XWPFHyperlink +{ + String id, url; + public XWPFHyperlink(String id, String url) + { + this.id = id; + this.url = url; + } + + public String getId() + { + return id; + } + + public String getURL() + { + return url; + } +} diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java new file mode 100644 index 000000000..57527cac0 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java @@ -0,0 +1,70 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xwpf.usermodel; + +import org.apache.poi.xwpf.model.XMLParagraph; +import org.apache.poi.xwpf.XWPFDocument; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText; + +/** + * Sketch of XWPF paragraph class + */ +public class XWPFParagraph extends XMLParagraph +{ + protected XWPFDocument docRef; // XXX: we'd like to have access to document's hyperlink, comments and other tables + /** + * TODO - replace with RichText String + */ + private StringBuffer text = new StringBuffer(); + + public XWPFParagraph(CTP prgrph, XWPFDocument docRef) + { + super(prgrph); + + this.docRef = docRef; + CTR[] rs = paragraph.getRArray(); + + // Get text + for (int j = 0; j < rs.length; j++) { + // Loop over text runs + CTText[] texts = rs[j].getTArray(); + for (int k = 0; k < texts.length; k++) { + text.append( + texts[k].getStringValue() + ); + } + } + } + + public XWPFParagraph(CTP prgrph) { + this(prgrph, null); + } + + public XWPFParagraph(XMLParagraph paragraph) { + this(paragraph.getCTP()); + } + + public XWPFDocument getDocRef() { + return docRef; + } + + public String getText() { + return text.toString(); + } +} diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraphText.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraphText.java new file mode 100644 index 000000000..a25b16467 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraphText.java @@ -0,0 +1,6 @@ +package org.apache.poi.xwpf.usermodel; + +public class XWPFParagraphText +{ + +} diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java new file mode 100644 index 000000000..3f69f4173 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java @@ -0,0 +1,55 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xwpf.usermodel; + +import org.apache.poi.xwpf.usermodel.XWPFParagraph; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTc; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; + +/** + * Sketch of XWPFTable class. Only table's text is being hold. + * + * @author Yury Batrakov (batrakov at gmail.com) + * + */ +public class XWPFTable +{ + protected StringBuffer text=new StringBuffer(); + + public XWPFTable(CTTbl table) + { + for(CTRow row : table.getTrArray()) + { + for(CTTc cell : row.getTcArray()) + { + for(CTP ctp : cell.getPArray()) + { + XWPFParagraph p = new XWPFParagraph(ctp); + this.text.append(p.getText()+"\t"); + } + } + this.text.append("\n"); + } + } + + public String getText() + { + return text.toString(); + } +}