Patch from Tim Allison from bug #54849 - Controlled content/Form (Std/StdBlock) content
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1494376 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
426a6f3451
commit
58b92caebe
@ -18,6 +18,7 @@ package org.apache.poi.xwpf.extractor;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.poi.POIXMLDocument;
|
||||
import org.apache.poi.POIXMLException;
|
||||
@ -26,13 +27,18 @@ import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
|
||||
import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
|
||||
import org.apache.poi.xwpf.usermodel.IBodyElement;
|
||||
import org.apache.poi.xwpf.usermodel.IRunElement;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFHyperlink;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFRelation;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFRun;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFSDT;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFTable;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFTableRow;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
|
||||
|
||||
@ -86,59 +92,11 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
||||
// Start out with all headers
|
||||
extractHeaders(text, hfPolicy);
|
||||
|
||||
// First up, all our paragraph based text
|
||||
Iterator<XWPFParagraph> i = document.getParagraphsIterator();
|
||||
while(i.hasNext()) {
|
||||
XWPFParagraph paragraph = i.next();
|
||||
|
||||
try {
|
||||
CTSectPr ctSectPr = null;
|
||||
if (paragraph.getCTP().getPPr()!=null) {
|
||||
ctSectPr = paragraph.getCTP().getPPr().getSectPr();
|
||||
}
|
||||
|
||||
XWPFHeaderFooterPolicy headerFooterPolicy = null;
|
||||
|
||||
if (ctSectPr!=null) {
|
||||
headerFooterPolicy = new XWPFHeaderFooterPolicy(document, ctSectPr);
|
||||
extractHeaders(text, headerFooterPolicy);
|
||||
}
|
||||
|
||||
// Do the paragraph text
|
||||
for(XWPFRun run : paragraph.getRuns()) {
|
||||
text.append(run.toString());
|
||||
if(run instanceof XWPFHyperlinkRun && fetchHyperlinks) {
|
||||
XWPFHyperlink link = ((XWPFHyperlinkRun)run).getHyperlink(document);
|
||||
if(link != null)
|
||||
text.append(" <" + link.getURL() + ">");
|
||||
}
|
||||
}
|
||||
|
||||
// Add comments
|
||||
XWPFCommentsDecorator decorator = new XWPFCommentsDecorator(paragraph, null);
|
||||
text.append(decorator.getCommentText()).append('\n');
|
||||
|
||||
// Do endnotes and footnotes
|
||||
String footnameText = paragraph.getFootnoteText();
|
||||
if(footnameText != null && footnameText.length() > 0) {
|
||||
text.append(footnameText + "\n");
|
||||
}
|
||||
|
||||
if (ctSectPr!=null) {
|
||||
extractFooters(text, headerFooterPolicy);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new POIXMLException(e);
|
||||
} catch (XmlException e) {
|
||||
throw new POIXMLException(e);
|
||||
}
|
||||
}
|
||||
|
||||
// Then our table based text
|
||||
Iterator<XWPFTable> j = document.getTablesIterator();
|
||||
while(j.hasNext()) {
|
||||
text.append(j.next().getText()).append('\n');
|
||||
}
|
||||
// body elements
|
||||
for (IBodyElement e : document.getBodyElements()){
|
||||
appendBodyElementText(text, e);
|
||||
text.append('\n');
|
||||
}
|
||||
|
||||
// Finish up with all the footers
|
||||
extractFooters(text, hfPolicy);
|
||||
@ -146,6 +104,79 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
||||
return text.toString();
|
||||
}
|
||||
|
||||
public void appendBodyElementText(StringBuffer text, IBodyElement e){
|
||||
if (e instanceof XWPFParagraph){
|
||||
appendParagraphText(text, (XWPFParagraph)e);
|
||||
} else if (e instanceof XWPFTable){
|
||||
appendTableText(text, (XWPFTable)e);
|
||||
} else if (e instanceof XWPFSDT){
|
||||
text.append(((XWPFSDT)e).getContent().getText());
|
||||
}
|
||||
}
|
||||
|
||||
public void appendParagraphText(StringBuffer text, XWPFParagraph paragraph){
|
||||
try {
|
||||
CTSectPr ctSectPr = null;
|
||||
if (paragraph.getCTP().getPPr()!=null) {
|
||||
ctSectPr = paragraph.getCTP().getPPr().getSectPr();
|
||||
}
|
||||
|
||||
XWPFHeaderFooterPolicy headerFooterPolicy = null;
|
||||
|
||||
if (ctSectPr!=null) {
|
||||
headerFooterPolicy = new XWPFHeaderFooterPolicy(document, ctSectPr);
|
||||
extractHeaders(text, headerFooterPolicy);
|
||||
}
|
||||
|
||||
|
||||
for(IRunElement run : paragraph.getRuns()) {
|
||||
text.append(run.toString());
|
||||
if(run instanceof XWPFHyperlinkRun && fetchHyperlinks) {
|
||||
XWPFHyperlink link = ((XWPFHyperlinkRun)run).getHyperlink(document);
|
||||
if(link != null)
|
||||
text.append(" <" + link.getURL() + ">");
|
||||
}
|
||||
}
|
||||
|
||||
// Add comments
|
||||
XWPFCommentsDecorator decorator = new XWPFCommentsDecorator(paragraph, null);
|
||||
String commentText = decorator.getCommentText();
|
||||
if (commentText.length() > 0){
|
||||
text.append(commentText).append('\n');
|
||||
}
|
||||
|
||||
// Do endnotes and footnotes
|
||||
String footnameText = paragraph.getFootnoteText();
|
||||
if(footnameText != null && footnameText.length() > 0) {
|
||||
text.append(footnameText + '\n');
|
||||
}
|
||||
|
||||
if (ctSectPr!=null) {
|
||||
extractFooters(text, headerFooterPolicy);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new POIXMLException(e);
|
||||
} catch (XmlException e) {
|
||||
throw new POIXMLException(e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void appendTableText(StringBuffer text, XWPFTable table){
|
||||
//this works recursively to pull embedded tables from tables
|
||||
for (XWPFTableRow row : table.getRows()){
|
||||
List<XWPFTableCell> cells = row.getTableCells();
|
||||
for (int i = 0; i < cells.size(); i++){
|
||||
XWPFTableCell cell = cells.get(i);
|
||||
text.append(cell.getTextRecursively());
|
||||
if (i < cells.size()-1){
|
||||
text.append("\t");
|
||||
}
|
||||
}
|
||||
text.append('\n');
|
||||
}
|
||||
}
|
||||
|
||||
private void extractFooters(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
|
||||
if(hfPolicy.getFirstPageFooter() != null) {
|
||||
text.append( hfPolicy.getFirstPageFooter().getText() );
|
||||
|
@ -28,7 +28,8 @@ package org.apache.poi.xwpf.usermodel;
|
||||
*
|
||||
*/
|
||||
public enum BodyElementType {
|
||||
PARAGRAPH,
|
||||
CONTENTCONTROL,
|
||||
PARAGRAPH,
|
||||
TABLE,
|
||||
|
||||
}
|
||||
|
@ -21,6 +21,7 @@ package org.apache.poi.xwpf.usermodel;
|
||||
* The different kinds of {@link IBody} that exist
|
||||
*/
|
||||
public enum BodyType {
|
||||
CONTENTCONTROL,
|
||||
DOCUMENT,
|
||||
HEADER,
|
||||
FOOTER,
|
||||
|
28
src/ooxml/java/org/apache/poi/xwpf/usermodel/IRunBody.java
Normal file
28
src/ooxml/java/org/apache/poi/xwpf/usermodel/IRunBody.java
Normal file
@ -0,0 +1,28 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xwpf.usermodel;
|
||||
|
||||
import org.apache.poi.POIXMLDocumentPart;
|
||||
|
||||
/**
|
||||
* Simple interface describing both {@link XWPFParagraph}
|
||||
* and {@link XWPFSDT}
|
||||
*/
|
||||
public interface IRunBody {
|
||||
public XWPFDocument getDocument();
|
||||
public POIXMLDocumentPart getPart();
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xwpf.usermodel;
|
||||
|
||||
/**
|
||||
* Common interface for things that can occur
|
||||
* where a run (text with common stylings) can,
|
||||
* eg {@link XWPFRun} or {@link XWPFSDT}.
|
||||
* More methods to follow shortly!
|
||||
*/
|
||||
public interface IRunElement {
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xwpf.usermodel;
|
||||
|
||||
/**
|
||||
* Interface for anything that can be within a STD:
|
||||
* {@link XWPFRun}, {@link XWPFTable}, {@link XWPFParagraph},
|
||||
* {@link XWPFSDT} etc
|
||||
*/
|
||||
public interface ISDTContents {
|
||||
}
|
@ -98,6 +98,7 @@ public class XWPFDocument extends POIXMLDocument implements Document, IBody {
|
||||
protected List<XWPFHyperlink> hyperlinks = new ArrayList<XWPFHyperlink>();
|
||||
protected List<XWPFParagraph> paragraphs = new ArrayList<XWPFParagraph>();
|
||||
protected List<XWPFTable> tables = new ArrayList<XWPFTable>();
|
||||
protected List<XWPFSDT> contentControls = new ArrayList<XWPFSDT>();
|
||||
protected List<IBodyElement> bodyElements = new ArrayList<IBodyElement>();
|
||||
protected List<XWPFPictureData> pictures = new ArrayList<XWPFPictureData>();
|
||||
protected Map<Long, List<XWPFPictureData>> packagePictures = new HashMap<Long, List<XWPFPictureData>>();
|
||||
@ -150,7 +151,11 @@ public class XWPFDocument extends POIXMLDocument implements Document, IBody {
|
||||
XWPFTable t = new XWPFTable((CTTbl) o, this);
|
||||
bodyElements.add(t);
|
||||
tables.add(t);
|
||||
}
|
||||
} else if (o instanceof CTSdtBlock){
|
||||
XWPFSDT c = new XWPFSDT((CTSdtBlock)o, this);
|
||||
bodyElements.add(c);
|
||||
contentControls.add(c);
|
||||
}
|
||||
}
|
||||
cursor.dispose();
|
||||
|
||||
@ -230,10 +235,10 @@ public class XWPFDocument extends POIXMLDocument implements Document, IBody {
|
||||
for(POIXMLDocumentPart p : getRelations()){
|
||||
String relation = p.getPackageRelationship().getRelationshipType();
|
||||
if (relation.equals(XWPFRelation.FOOTNOTE.getRelation())) {
|
||||
FootnotesDocument footnotesDocument = FootnotesDocument.Factory.parse(p.getPackagePart().getInputStream());
|
||||
this.footnotes = (XWPFFootnotes)p;
|
||||
this.footnotes.onDocumentRead();
|
||||
|
||||
// Warning - this apparently doubles footnotes - see bug #????
|
||||
FootnotesDocument footnotesDocument = FootnotesDocument.Factory.parse(p.getPackagePart().getInputStream());
|
||||
for(CTFtnEdn ctFtnEdn : footnotesDocument.getFootnotes().getFootnoteList()) {
|
||||
footnotes.addFootnote(ctFtnEdn);
|
||||
}
|
||||
|
@ -34,6 +34,7 @@ import org.apache.xmlbeans.XmlOptions;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtr;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTNumbering;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.FtrDocument;
|
||||
|
||||
@ -61,6 +62,7 @@ public class XWPFFooter extends XWPFHeaderFooter {
|
||||
tables.add(t);
|
||||
bodyElements.add(t);
|
||||
}
|
||||
|
||||
}
|
||||
cursor.dispose();
|
||||
}
|
||||
@ -118,6 +120,10 @@ public class XWPFFooter extends XWPFHeaderFooter {
|
||||
tables.add(t);
|
||||
bodyElements.add(t);
|
||||
}
|
||||
if (o instanceof CTSdtBlock){
|
||||
XWPFSDT c = new XWPFSDT((CTSdtBlock)o, this);
|
||||
bodyElements.add(c);
|
||||
}
|
||||
}
|
||||
cursor.dispose();
|
||||
} catch (Exception e) {
|
||||
|
@ -26,6 +26,7 @@ import org.apache.xmlbeans.XmlObject;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFtnEdn;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTc;
|
||||
|
||||
@ -37,21 +38,45 @@ public class XWPFFootnote implements Iterable<XWPFParagraph>,IBody {
|
||||
|
||||
private CTFtnEdn ctFtnEdn;
|
||||
private XWPFFootnotes footnotes;
|
||||
private XWPFDocument document;
|
||||
|
||||
public XWPFFootnote(CTFtnEdn note, XWPFFootnotes xFootnotes) {
|
||||
footnotes = xFootnotes;
|
||||
ctFtnEdn = note;
|
||||
for (CTP p : ctFtnEdn.getPList()) {
|
||||
paragraphs.add(new XWPFParagraph(p, this));
|
||||
}
|
||||
document = xFootnotes.getXWPFDocument();
|
||||
init();
|
||||
}
|
||||
|
||||
public XWPFFootnote(XWPFDocument document, CTFtnEdn body) {
|
||||
for (CTP p : body.getPList()) {
|
||||
paragraphs.add(new XWPFParagraph(p, document));
|
||||
}
|
||||
ctFtnEdn = body;
|
||||
this.document = document;
|
||||
init();
|
||||
}
|
||||
|
||||
private void init(){
|
||||
XmlCursor cursor = ctFtnEdn.newCursor();
|
||||
//copied from XWPFDocument...should centralize this code
|
||||
//to avoid duplication
|
||||
cursor.selectPath("./*");
|
||||
while (cursor.toNextSelection()) {
|
||||
XmlObject o = cursor.getObject();
|
||||
if (o instanceof CTP) {
|
||||
XWPFParagraph p = new XWPFParagraph((CTP) o, this);
|
||||
bodyElements.add(p);
|
||||
paragraphs.add(p);
|
||||
} else if (o instanceof CTTbl) {
|
||||
XWPFTable t = new XWPFTable((CTTbl) o, this);
|
||||
bodyElements.add(t);
|
||||
tables.add(t);
|
||||
} else if (o instanceof CTSdtBlock){
|
||||
XWPFSDT c = new XWPFSDT((CTSdtBlock)o, this);
|
||||
bodyElements.add(c);
|
||||
}
|
||||
|
||||
}
|
||||
cursor.dispose();
|
||||
}
|
||||
|
||||
public List<XWPFParagraph> getParagraphs() {
|
||||
return paragraphs;
|
||||
}
|
||||
@ -314,7 +339,7 @@ public class XWPFFootnote implements Iterable<XWPFParagraph>,IBody {
|
||||
* @see org.apache.poi.xwpf.usermodel.IBody#getXWPFDocument()
|
||||
*/
|
||||
public XWPFDocument getXWPFDocument() {
|
||||
return footnotes.getXWPFDocument();
|
||||
return document;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -35,6 +35,7 @@ import org.apache.xmlbeans.XmlOptions;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtr;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTNumbering;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.HdrDocument;
|
||||
|
||||
@ -121,6 +122,10 @@ public class XWPFHeader extends XWPFHeaderFooter {
|
||||
tables.add(t);
|
||||
bodyElements.add(t);
|
||||
}
|
||||
if (o instanceof CTSdtBlock){
|
||||
XWPFSDT c = new XWPFSDT((CTSdtBlock)o, this);
|
||||
bodyElements.add(c);
|
||||
}
|
||||
}
|
||||
cursor.dispose();
|
||||
} catch (XmlException e) {
|
||||
|
@ -129,7 +129,7 @@ public abstract class XWPFHeaderFooter extends POIXMLDocumentPart implements IBo
|
||||
*/
|
||||
public String getText() {
|
||||
StringBuffer t = new StringBuffer();
|
||||
|
||||
//TODO: simplify this to get ibody elements in order
|
||||
for(int i=0; i<paragraphs.size(); i++) {
|
||||
if(! paragraphs.get(i).isEmpty()) {
|
||||
String text = paragraphs.get(i).getText();
|
||||
@ -149,6 +149,11 @@ public abstract class XWPFHeaderFooter extends POIXMLDocumentPart implements IBo
|
||||
}
|
||||
}
|
||||
|
||||
for (IBodyElement bodyElement : getBodyElements()){
|
||||
if (bodyElement instanceof XWPFSDT){
|
||||
t.append(((XWPFSDT) bodyElement).getContent().getText()+'\n');
|
||||
}
|
||||
}
|
||||
return t.toString();
|
||||
}
|
||||
|
||||
|
@ -27,7 +27,7 @@ public class XWPFHyperlinkRun extends XWPFRun
|
||||
{
|
||||
private CTHyperlink hyperlink;
|
||||
|
||||
public XWPFHyperlinkRun(CTHyperlink hyperlink, CTR run, XWPFParagraph p) {
|
||||
public XWPFHyperlinkRun(CTHyperlink hyperlink, CTR run, IRunBody p) {
|
||||
super(run, p);
|
||||
this.hyperlink = hyperlink;
|
||||
}
|
||||
|
@ -40,6 +40,7 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTProofErr;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRunTrackChange;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSimpleField;
|
||||
@ -61,12 +62,13 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.STTextAlignment;
|
||||
* actual text (possibly along with more styling) is held on
|
||||
* the child {@link XWPFRun}s.</p>
|
||||
*/
|
||||
public class XWPFParagraph implements IBodyElement {
|
||||
public class XWPFParagraph implements IBodyElement, IRunBody, ISDTContents {
|
||||
private final CTP paragraph;
|
||||
protected IBody part;
|
||||
/** For access to the document's hyperlink, comments, tables etc */
|
||||
protected XWPFDocument document;
|
||||
protected List<XWPFRun> runs;
|
||||
protected List<IRunElement> iruns;
|
||||
|
||||
private StringBuffer footnoteText = new StringBuffer();
|
||||
|
||||
@ -82,6 +84,7 @@ public class XWPFParagraph implements IBodyElement {
|
||||
|
||||
// Build up the character runs
|
||||
runs = new ArrayList<XWPFRun>();
|
||||
iruns = new ArrayList<IRunElement>();
|
||||
buildRunsInOrderFromXml(paragraph);
|
||||
|
||||
// Look for bits associated with the runs
|
||||
@ -96,7 +99,7 @@ public class XWPFParagraph implements IBodyElement {
|
||||
XmlObject o = c.getObject();
|
||||
if(o instanceof CTFtnEdnRef) {
|
||||
CTFtnEdnRef ftn = (CTFtnEdnRef)o;
|
||||
footnoteText.append("[").append(ftn.getId()).append(": ");
|
||||
footnoteText.append(" [").append(ftn.getId()).append(": ");
|
||||
XWPFFootnote footnote =
|
||||
ftn.getDomNode().getLocalName().equals("footnoteReference") ?
|
||||
document.getFootnoteByID(ftn.getId().intValue()) :
|
||||
@ -111,7 +114,7 @@ public class XWPFParagraph implements IBodyElement {
|
||||
footnoteText.append(p.getText());
|
||||
}
|
||||
|
||||
footnoteText.append("]");
|
||||
footnoteText.append("] ");
|
||||
}
|
||||
}
|
||||
c.dispose();
|
||||
@ -129,30 +132,40 @@ public class XWPFParagraph implements IBodyElement {
|
||||
while (c.toNextSelection()) {
|
||||
XmlObject o = c.getObject();
|
||||
if (o instanceof CTR) {
|
||||
runs.add(new XWPFRun((CTR) o, this));
|
||||
}
|
||||
if (o instanceof CTHyperlink) {
|
||||
CTHyperlink link = (CTHyperlink) o;
|
||||
for (CTR r : link.getRList()) {
|
||||
runs.add(new XWPFHyperlinkRun(link, r, this));
|
||||
}
|
||||
}
|
||||
if (o instanceof CTSdtRun) {
|
||||
CTSdtContentRun run = ((CTSdtRun) o).getSdtContent();
|
||||
for (CTR r : run.getRList()) {
|
||||
runs.add(new XWPFRun(r, this));
|
||||
}
|
||||
}
|
||||
if (o instanceof CTRunTrackChange) {
|
||||
for (CTR r : ((CTRunTrackChange) o).getRList()) {
|
||||
runs.add(new XWPFRun(r, this));
|
||||
}
|
||||
}
|
||||
if (o instanceof CTSimpleField) {
|
||||
for (CTR r : ((CTSimpleField) o).getRList()) {
|
||||
runs.add(new XWPFRun(r, this));
|
||||
}
|
||||
}
|
||||
XWPFRun r = new XWPFRun((CTR) o, this);
|
||||
runs.add(r);
|
||||
iruns.add(r);
|
||||
}
|
||||
if (o instanceof CTHyperlink) {
|
||||
CTHyperlink link = (CTHyperlink) o;
|
||||
for (CTR r : link.getRList()) {
|
||||
XWPFHyperlinkRun hr = new XWPFHyperlinkRun(link, r, this);
|
||||
runs.add(hr);
|
||||
iruns.add(hr);
|
||||
}
|
||||
}
|
||||
if (o instanceof CTSdtBlock) {
|
||||
XWPFSDT cc = new XWPFSDT((CTSdtBlock) o, part);
|
||||
iruns.add(cc);
|
||||
}
|
||||
if (o instanceof CTSdtRun) {
|
||||
XWPFSDT cc = new XWPFSDT((CTSdtRun) o, part);
|
||||
iruns.add(cc);
|
||||
}
|
||||
if (o instanceof CTRunTrackChange) {
|
||||
for (CTR r : ((CTRunTrackChange) o).getRList()) {
|
||||
XWPFRun cr = new XWPFRun(r, this);
|
||||
runs.add(cr);
|
||||
iruns.add(cr);
|
||||
}
|
||||
}
|
||||
if (o instanceof CTSimpleField) {
|
||||
for (CTR r : ((CTSimpleField) o).getRList()) {
|
||||
XWPFRun cr = new XWPFRun(r, this);
|
||||
runs.add(cr);
|
||||
iruns.add(cr);
|
||||
}
|
||||
}
|
||||
if (o instanceof CTSmartTagRun) {
|
||||
// Smart Tags can be nested many times.
|
||||
// This implementation does not preserve the tagging information
|
||||
@ -171,6 +184,14 @@ public class XWPFParagraph implements IBodyElement {
|
||||
return Collections.unmodifiableList(runs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return literal runs and sdt/content control objects.
|
||||
* @return List<IRunElement>
|
||||
*/
|
||||
public List<IRunElement> getIRuns() {
|
||||
return Collections.unmodifiableList(iruns);
|
||||
}
|
||||
|
||||
public boolean isEmpty(){
|
||||
return !paragraph.getDomNode().hasChildNodes();
|
||||
}
|
||||
@ -181,12 +202,16 @@ public class XWPFParagraph implements IBodyElement {
|
||||
|
||||
/**
|
||||
* Return the textual content of the paragraph, including text from pictures
|
||||
* in it.
|
||||
* and sdt elements in it.
|
||||
*/
|
||||
public String getText() {
|
||||
StringBuffer out = new StringBuffer();
|
||||
for(XWPFRun run : runs) {
|
||||
out.append(run.toString());
|
||||
for (IRunElement run : iruns) {
|
||||
if (run instanceof XWPFSDT){
|
||||
out.append(((XWPFSDT)run).getContent().getText());
|
||||
} else {
|
||||
out.append(run.toString());
|
||||
}
|
||||
}
|
||||
out.append(footnoteText);
|
||||
return out.toString();
|
||||
|
@ -67,7 +67,7 @@ public class XWPFPicture {
|
||||
}
|
||||
|
||||
String blipId = blipProps.getBlip().getEmbed();
|
||||
POIXMLDocumentPart part = run.getParagraph().getPart();
|
||||
POIXMLDocumentPart part = run.getParent().getPart();
|
||||
if (part != null)
|
||||
{
|
||||
POIXMLDocumentPart relatedPart = part.getRelationById(blipId);
|
||||
|
@ -74,24 +74,20 @@ import org.openxmlformats.schemas.drawingml.x2006.picture.CTPictureNonVisual;
|
||||
|
||||
/**
|
||||
* XWPFRun object defines a region of text with a common set of properties
|
||||
*
|
||||
* @author Yegor Kozlov
|
||||
* @author Gregg Morris (gregg dot morris at gmail dot com) - added getColor(), setColor()
|
||||
*
|
||||
*/
|
||||
public class XWPFRun {
|
||||
public class XWPFRun implements ISDTContents, IRunElement{
|
||||
private CTR run;
|
||||
private String pictureText;
|
||||
private XWPFParagraph paragraph;
|
||||
private IRunBody parent;
|
||||
private List<XWPFPicture> pictures;
|
||||
|
||||
/**
|
||||
* @param r the CTR bean which holds the run attributes
|
||||
* @param p the parent paragraph
|
||||
*/
|
||||
public XWPFRun(CTR r, XWPFParagraph p) {
|
||||
public XWPFRun(CTR r, IRunBody p) {
|
||||
this.run = r;
|
||||
this.paragraph = p;
|
||||
this.parent = p;
|
||||
|
||||
/**
|
||||
* reserve already occupied drawing ids, so reserving new ids later will
|
||||
@ -143,6 +139,12 @@ public class XWPFRun {
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @deprecated Use {@link XWPFRun#XWPFRun(CTR, IRunBody)}
|
||||
*/
|
||||
public XWPFRun(CTR r, XWPFParagraph p) {
|
||||
this(r, (IRunBody)p);
|
||||
}
|
||||
|
||||
private List<CTPicture> getCTPictures(XmlObject o) {
|
||||
List<CTPicture> pictures = new ArrayList<CTPicture>();
|
||||
@ -173,11 +175,20 @@ public class XWPFRun {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the currenty referenced paragraph object
|
||||
* @return current paragraph
|
||||
* Get the currently referenced paragraph/SDT object
|
||||
* @return current parent
|
||||
*/
|
||||
public IRunBody getParent() {
|
||||
return parent;
|
||||
}
|
||||
/**
|
||||
* Get the currently referenced paragraph, or null if a SDT object
|
||||
* @deprecated use {@link XWPFRun#getParent()} instead
|
||||
*/
|
||||
public XWPFParagraph getParagraph() {
|
||||
return paragraph;
|
||||
if (parent instanceof XWPFParagraph)
|
||||
return (XWPFParagraph)parent;
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -185,8 +196,8 @@ public class XWPFRun {
|
||||
* <code>null</code> if parent structure (paragraph > document) is not properly set.
|
||||
*/
|
||||
public XWPFDocument getDocument() {
|
||||
if (paragraph != null) {
|
||||
return paragraph.getDocument();
|
||||
if (parent != null) {
|
||||
return parent.getDocument();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
@ -663,7 +674,7 @@ public class XWPFRun {
|
||||
*/
|
||||
public XWPFPicture addPicture(InputStream pictureData, int pictureType, String filename, int width, int height)
|
||||
throws InvalidFormatException, IOException {
|
||||
XWPFDocument doc = paragraph.document;
|
||||
XWPFDocument doc = parent.getDocument();
|
||||
|
||||
// Add the picture + relationship
|
||||
String relationId = doc.addPictureData(pictureData, pictureType);
|
||||
@ -691,7 +702,7 @@ public class XWPFRun {
|
||||
inline.setDistL(0);
|
||||
|
||||
CTNonVisualDrawingProps docPr = inline.addNewDocPr();
|
||||
long id = getParagraph().document.getDrawingIdManager().reserveNew();
|
||||
long id = getParent().getDocument().getDrawingIdManager().reserveNew();
|
||||
docPr.setId(id);
|
||||
/* This name is not visible in Word 2010 anywhere. */
|
||||
docPr.setName("Drawing " + id);
|
||||
|
110
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFSDT.java
Normal file
110
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFSDT.java
Normal file
@ -0,0 +1,110 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xwpf.usermodel;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.poi.POIXMLDocumentPart;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtPr;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTString;
|
||||
|
||||
/**
|
||||
* Experimental class to offer rudimentary read-only processing of
|
||||
* of StructuredDocumentTags/ContentControl
|
||||
*
|
||||
*
|
||||
*
|
||||
* WARNING - APIs expected to change rapidly
|
||||
*
|
||||
*/
|
||||
public class XWPFSDT implements IBodyElement, IRunBody, ISDTContents, IRunElement {
|
||||
private final String title;
|
||||
private final String tag;
|
||||
private final XWPFSDTContent content;
|
||||
private final IBody part;
|
||||
|
||||
public XWPFSDT(CTSdtRun sdtRun, IBody part){
|
||||
this.part = part;
|
||||
this.content = new XWPFSDTContent(sdtRun.getSdtContent(), part, this);
|
||||
CTSdtPr pr = sdtRun.getSdtPr();
|
||||
List<CTString> aliases = pr.getAliasList();
|
||||
if (aliases != null && aliases.size() > 0){
|
||||
title = aliases.get(0).getVal();
|
||||
} else {
|
||||
title = "";
|
||||
}
|
||||
@SuppressWarnings("deprecation")
|
||||
CTString[] array = pr.getTagArray();
|
||||
if (array != null && array.length > 0){
|
||||
tag = array[0].getVal();
|
||||
} else {
|
||||
tag = "";
|
||||
}
|
||||
|
||||
}
|
||||
public XWPFSDT(CTSdtBlock block, IBody part){
|
||||
this.part = part;
|
||||
this.content = new XWPFSDTContent( block.getSdtContent(), part, this);
|
||||
CTSdtPr pr = block.getSdtPr();
|
||||
List<CTString> aliases = pr.getAliasList();
|
||||
if (aliases != null && aliases.size() > 0){
|
||||
title = aliases.get(0).getVal();
|
||||
} else {
|
||||
title = "";
|
||||
}
|
||||
@SuppressWarnings("deprecation")
|
||||
CTString[] array = pr.getTagArray();
|
||||
if (array != null && array.length > 0){
|
||||
tag = array[0].getVal();
|
||||
} else {
|
||||
tag = "";
|
||||
}
|
||||
|
||||
}
|
||||
public String getTitle(){
|
||||
return title;
|
||||
}
|
||||
public String getTag(){
|
||||
return tag;
|
||||
}
|
||||
public XWPFSDTContent getContent(){
|
||||
return content;
|
||||
}
|
||||
|
||||
public IBody getBody() {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
public POIXMLDocumentPart getPart() {
|
||||
return part.getPart();
|
||||
}
|
||||
|
||||
public BodyType getPartType() {
|
||||
return BodyType.CONTENTCONTROL;
|
||||
}
|
||||
|
||||
public BodyElementType getElementType() {
|
||||
return BodyElementType.CONTENTCONTROL;
|
||||
}
|
||||
|
||||
public XWPFDocument getDocument() {
|
||||
return part.getXWPFDocument();
|
||||
}
|
||||
}
|
107
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFSDTContent.java
Normal file
107
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFSDTContent.java
Normal file
@ -0,0 +1,107 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xwpf.usermodel;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
import org.apache.xmlbeans.XmlCursor;
|
||||
import org.apache.xmlbeans.XmlObject;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
||||
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentBlock;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
|
||||
|
||||
/**
|
||||
* Experimental class to offer rudimentary read-only processing of
|
||||
* of the contentblock of an SDT/ContentControl.
|
||||
*
|
||||
*
|
||||
*
|
||||
* WARNING - APIs expected to change rapidly
|
||||
*
|
||||
*/
|
||||
public class XWPFSDTContent {
|
||||
|
||||
// private final IBody part;
|
||||
// private final XWPFDocument document;
|
||||
private List<XWPFParagraph> paragraphs = new ArrayList<XWPFParagraph>();
|
||||
private List<XWPFTable> tables = new ArrayList<XWPFTable>();
|
||||
private List<XWPFRun> runs = new ArrayList<XWPFRun>();
|
||||
private List<XWPFSDT> contentControls = new ArrayList<XWPFSDT>();
|
||||
private List<ISDTContents> bodyElements = new ArrayList<ISDTContents>();
|
||||
|
||||
public XWPFSDTContent(CTSdtContentRun sdtRun, IBody part, IRunBody parent){
|
||||
for (CTR ctr : sdtRun.getRList()){
|
||||
XWPFRun run = new XWPFRun((CTR) ctr, parent);
|
||||
runs.add(run);
|
||||
bodyElements.add(run);
|
||||
}
|
||||
}
|
||||
public XWPFSDTContent(CTSdtContentBlock block, IBody part, IRunBody parent){
|
||||
XmlCursor cursor = block.newCursor();
|
||||
cursor.selectPath("./*");
|
||||
while (cursor.toNextSelection()) {
|
||||
XmlObject o = cursor.getObject();
|
||||
if (o instanceof CTP) {
|
||||
XWPFParagraph p = new XWPFParagraph((CTP) o, part);
|
||||
bodyElements.add(p);
|
||||
paragraphs.add(p);
|
||||
} else if (o instanceof CTTbl) {
|
||||
XWPFTable t = new XWPFTable((CTTbl) o, part);
|
||||
bodyElements.add(t);
|
||||
tables.add(t);
|
||||
} else if (o instanceof CTSdtBlock){
|
||||
XWPFSDT c = new XWPFSDT(((CTSdtBlock)o), part);
|
||||
bodyElements.add(c);
|
||||
contentControls.add(c);
|
||||
} else if (o instanceof CTR) {
|
||||
XWPFRun run = new XWPFRun((CTR) o, parent);
|
||||
runs.add(run);
|
||||
bodyElements.add(run);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String getText(){
|
||||
StringBuilder text = new StringBuilder();
|
||||
for (int i = 0; i < bodyElements.size(); i++){
|
||||
Object o = bodyElements.get(i);
|
||||
if (o instanceof XWPFParagraph){
|
||||
text.append(((XWPFParagraph)o).getText());
|
||||
} else if (o instanceof XWPFTable){
|
||||
text.append(((XWPFTable)o).getText());
|
||||
} else if (o instanceof XWPFSDT){
|
||||
text.append(((XWPFSDT)o).getContent().getText());
|
||||
} else if (o instanceof XWPFRun){
|
||||
text.append(((XWPFRun)o).toString());
|
||||
}
|
||||
if (i < bodyElements.size()-1){
|
||||
text.append("\n");
|
||||
}
|
||||
}
|
||||
return text.toString();
|
||||
}
|
||||
|
||||
public String toString(){
|
||||
return getText();
|
||||
}
|
||||
}
|
@ -43,7 +43,7 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.STTblWidth;
|
||||
* <p>Specifies the contents of a table present in the document. A table is a set
|
||||
* of paragraphs (and other block-level content) arranged in rows and columns.</p>
|
||||
*/
|
||||
public class XWPFTable implements IBodyElement {
|
||||
public class XWPFTable implements IBodyElement, ISDTContents {
|
||||
protected StringBuffer text = new StringBuffer();
|
||||
private CTTbl ctTbl;
|
||||
protected List<XWPFTableRow> tableRows;
|
||||
|
@ -28,6 +28,8 @@ import org.apache.xmlbeans.XmlCursor;
|
||||
import org.apache.xmlbeans.XmlObject;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTShd;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTc;
|
||||
@ -97,6 +99,15 @@ public class XWPFTableCell implements IBody {
|
||||
tables.add(t);
|
||||
bodyElements.add(t);
|
||||
}
|
||||
if (o instanceof CTSdtBlock){
|
||||
XWPFSDT c = new XWPFSDT((CTSdtBlock)o, this);
|
||||
bodyElements.add(c);
|
||||
}
|
||||
if (o instanceof CTSdtRun){
|
||||
XWPFSDT c = new XWPFSDT((CTSdtRun)o, this);
|
||||
System.out.println(c.getContent().getText());
|
||||
bodyElements.add(c);
|
||||
}
|
||||
}
|
||||
cursor.dispose();
|
||||
}
|
||||
@ -407,6 +418,48 @@ public class XWPFTableCell implements IBody {
|
||||
return text.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* extracts all text recursively through embedded tables and embedded SDTs
|
||||
*/
|
||||
public String getTextRecursively(){
|
||||
|
||||
StringBuffer text = new StringBuffer();
|
||||
for (int i = 0; i < bodyElements.size(); i++){
|
||||
boolean isLast = (i== bodyElements.size()-1)? true : false;
|
||||
appendBodyElementText(text, bodyElements.get(i), isLast);
|
||||
}
|
||||
|
||||
return text.toString();
|
||||
}
|
||||
|
||||
private void appendBodyElementText(StringBuffer text, IBodyElement e, boolean isLast){
|
||||
if (e instanceof XWPFParagraph){
|
||||
text.append(((XWPFParagraph)e).getText());
|
||||
if (isLast == false){
|
||||
text.append('\t');
|
||||
}
|
||||
} else if (e instanceof XWPFTable){
|
||||
XWPFTable eTable = (XWPFTable)e;
|
||||
for (XWPFTableRow row : eTable.getRows()){
|
||||
for (XWPFTableCell cell : row.getTableCells()){
|
||||
List<IBodyElement> localBodyElements = cell.getBodyElements();
|
||||
for (int i = 0; i < localBodyElements.size(); i++){
|
||||
boolean localIsLast = (i== localBodyElements.size()-1)? true : false;
|
||||
appendBodyElementText(text, localBodyElements.get(i), localIsLast);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (isLast == false){
|
||||
text.append('\n');
|
||||
}
|
||||
} else if (e instanceof XWPFSDT){
|
||||
text.append(((XWPFSDT)e).getContent().getText());
|
||||
if (isLast == false){
|
||||
text.append('\t');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* get the TableCell which belongs to the TableCell
|
||||
|
@ -132,6 +132,9 @@ public class XWPFTableRow {
|
||||
for (CTTc tableCell : ctRow.getTcList()) {
|
||||
cells.add(new XWPFTableCell(tableCell, this, table.getBody()));
|
||||
}
|
||||
//TODO: it is possible to have an SDT that contains a cell in within a row
|
||||
//need to modify this code so that it pulls out SDT wrappers around cells, too.
|
||||
|
||||
this.tableCells = cells;
|
||||
}
|
||||
return tableCells;
|
||||
|
@ -80,10 +80,10 @@ public class TestXWPFWordExtractor extends TestCase {
|
||||
" \n(V) ILLUSTRATIVE CASES\n\n"
|
||||
));
|
||||
assertTrue(text.contains(
|
||||
"As well as gaining " + euro + "90 from child benefit increases, he will also receive the early childhood supplement of " + euro + "250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
|
||||
"As well as gaining " + euro + "90 from child benefit increases, he will also receive the early childhood supplement of " + euro + "250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n"// \n\n\n"
|
||||
));
|
||||
assertTrue(text.endsWith(
|
||||
"11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n"
|
||||
"11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n \n\n\n"
|
||||
));
|
||||
|
||||
// Check number of paragraphs
|
||||
@ -317,4 +317,39 @@ public class TestXWPFWordExtractor extends TestCase {
|
||||
|
||||
extractor.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for basic extraction of SDT content
|
||||
* @throws IOException
|
||||
*/
|
||||
public void testSimpleControlContent() throws IOException {
|
||||
XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Bug54849.docx");
|
||||
String[] targs = new String[]{
|
||||
"header_rich_text",
|
||||
"rich_text",
|
||||
"rich_text_pre_table\nrich_text_cell1\t\t\t\n\nrich_text_post_table",
|
||||
"plain_text_no_newlines",
|
||||
"plain_text_with_newlines1\nplain_text_with_newlines2\n",
|
||||
"watermelon\n",
|
||||
"dirt\n",
|
||||
"4/16/2013\n",
|
||||
"rich_text_in_paragraph_in_cell",
|
||||
"footer_rich_text",
|
||||
"footnote_sdt",
|
||||
"endnote_sdt"
|
||||
};
|
||||
XWPFWordExtractor ex = new XWPFWordExtractor(doc);
|
||||
String s = ex.getText().toLowerCase();
|
||||
int hits = 0;
|
||||
|
||||
for (String targ : targs){
|
||||
boolean hit = false;
|
||||
if (s.indexOf(targ) > -1){
|
||||
hit = true;
|
||||
hits++;
|
||||
}
|
||||
assertEquals("controlled content loading-"+targ, true, hit);
|
||||
}
|
||||
assertEquals("controlled content loading hit count", targs.length, hits);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,158 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xwpf.usermodel;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.poi.xwpf.XWPFTestDataSamples;
|
||||
|
||||
public final class TestXWPFSDT extends TestCase {
|
||||
|
||||
/**
|
||||
* Test simple tag and title extraction from SDT
|
||||
* @throws Exception
|
||||
*/
|
||||
public void testTagTitle() throws Exception {
|
||||
XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Bug54849.docx");
|
||||
String tag = null;
|
||||
String title= null;
|
||||
List<XWPFSDT> sdts = extractAllSDTs(doc);
|
||||
for (XWPFSDT sdt :sdts){
|
||||
if (sdt.getContent().toString().equals("Rich_text")){
|
||||
tag = "MyTag";
|
||||
title = "MyTitle";
|
||||
break;
|
||||
}
|
||||
}
|
||||
// TODO Fix footnotes issues then enable
|
||||
// assertEquals("controls size", 12, sdts.size());
|
||||
|
||||
assertEquals("tag", "MyTag", tag);
|
||||
assertEquals("title", "MyTitle", title);
|
||||
}
|
||||
|
||||
|
||||
public void testGetSDTs() throws Exception{
|
||||
String[] contents = new String[]{
|
||||
"header_rich_text",
|
||||
"Rich_text",
|
||||
"Rich_text_pre_table\nRich_text_cell1\t\t\t\n\nRich_text_post_table",
|
||||
"Plain_text_no_newlines",
|
||||
"Plain_text_with_newlines1\nplain_text_with_newlines2",
|
||||
"Watermelon",
|
||||
"Dirt",
|
||||
"4/16/2013",
|
||||
"rich_text_in_paragraph_in_cell",
|
||||
"Footer_rich_text",
|
||||
"Footnote_sdt",
|
||||
"Endnote_sdt"
|
||||
|
||||
};
|
||||
XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Bug54849.docx");
|
||||
List<XWPFSDT> sdts = extractAllSDTs(doc);
|
||||
|
||||
// TODO Fix footnotes issue
|
||||
/*
|
||||
assertEquals("number of sdts", contents.length, sdts.size());
|
||||
|
||||
for (int i = 0; i < sdts.size(); i++){//contents.length; i++){
|
||||
XWPFSDT sdt = sdts.get(i);
|
||||
|
||||
assertEquals(i+ ": " + contents[i], contents[i], sdt.getContent().toString());
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
public void testFailureToGetSDTAsCell() throws Exception{
|
||||
/**
|
||||
* The current code fails to extract an sdt if it comprises/is the parent
|
||||
* of a cell in a table.
|
||||
*/
|
||||
XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Bug54849.docx");
|
||||
List<XWPFSDT> sdts = extractAllSDTs(doc);
|
||||
boolean found = false;
|
||||
for (XWPFSDT sdt : sdts){
|
||||
if (sdt.getContent().getText().toLowerCase().indexOf("rich_text_in_cell") > -1){
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
assertEquals("SDT as cell known failure", false, found);
|
||||
}
|
||||
|
||||
private List<XWPFSDT> extractAllSDTs(XWPFDocument doc){
|
||||
|
||||
List<XWPFSDT> sdts = new ArrayList<XWPFSDT>();
|
||||
|
||||
List<XWPFHeader> headers = doc.getHeaderList();
|
||||
for (XWPFHeader header : headers){
|
||||
sdts.addAll(extractSDTsFromBodyElements(header.getBodyElements()));
|
||||
}
|
||||
sdts.addAll(extractSDTsFromBodyElements(doc.getBodyElements()));
|
||||
|
||||
List<XWPFFooter> footers = doc.getFooterList();
|
||||
for (XWPFFooter footer : footers){
|
||||
sdts.addAll(extractSDTsFromBodyElements(footer.getBodyElements()));
|
||||
}
|
||||
|
||||
for (XWPFFootnote footnote : doc.getFootnotes()){
|
||||
|
||||
sdts.addAll(extractSDTsFromBodyElements(footnote.getBodyElements()));
|
||||
}
|
||||
for (Map.Entry<Integer, XWPFFootnote> e : doc.endnotes.entrySet()){
|
||||
sdts.addAll(extractSDTsFromBodyElements(e.getValue().getBodyElements()));
|
||||
}
|
||||
return sdts;
|
||||
}
|
||||
|
||||
private List<XWPFSDT> extractSDTsFromBodyElements(List<IBodyElement> elements){
|
||||
List<XWPFSDT> sdts = new ArrayList<XWPFSDT>();
|
||||
for (IBodyElement e : elements){
|
||||
if (e instanceof XWPFSDT){
|
||||
XWPFSDT sdt = (XWPFSDT)e;
|
||||
sdts.add(sdt);
|
||||
} else if (e instanceof XWPFParagraph){
|
||||
|
||||
XWPFParagraph p = (XWPFParagraph)e;
|
||||
for (IRunElement e2 : p.getIRuns()){
|
||||
if (e2 instanceof XWPFSDT){
|
||||
XWPFSDT sdt = (XWPFSDT)e2;
|
||||
sdts.add(sdt);
|
||||
}
|
||||
}
|
||||
} else if (e instanceof XWPFTable){
|
||||
XWPFTable table = (XWPFTable)e;
|
||||
sdts.addAll(extractSDTsFromTable(table));
|
||||
}
|
||||
}
|
||||
return sdts;
|
||||
}
|
||||
|
||||
private List<XWPFSDT> extractSDTsFromTable(XWPFTable table){
|
||||
List<XWPFSDT> sdts = new ArrayList<XWPFSDT>();
|
||||
for (XWPFTableRow r : table.getRows()){
|
||||
for (XWPFTableCell c : r.getTableCells()){
|
||||
sdts.addAll(extractSDTsFromBodyElements(c.getBodyElements()));
|
||||
}
|
||||
}
|
||||
return sdts;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user