Support for extraction of footnotes from docx files, see Bugzilla 45556
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@795328 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3304fa1887
commit
fa31a65d14
@ -33,6 +33,8 @@
|
||||
|
||||
<changes>
|
||||
<release version="3.5-beta7" date="2009-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="fix">45556 - Fixed ExtractorFactory to support .xltx and .dotx files</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">45556 - Support for extraction of footnotes from docx files</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">47520 - Initial support for custom XML mappings in XSSF</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47460 - Fixed NPE when retrieving core properties from a newly created workbook</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47498 - Fixed HyperlinkRecord to properly handle URL monikers</action>
|
||||
@ -41,7 +43,7 @@
|
||||
<action dev="POI-DEVELOPERS" type="fix">47448 - Allow HSSFEventFactory to handle non-zero padding at the end of the workbook stream</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">47456 - Support for getting OLE object data in PowerPointExtractor</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47411 - Explicitly set the 1900 date system when creating XSSF workbooks</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">47400 - Support fo text extraction of footnotes, endnotes and comments in HWPF</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">47400 - Support for text extraction of footnotes, endnotes and comments in HWPF</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47415 - Fixed PageSettingsBlock to allow multiple PLS records</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47412 - Fixed concurrency issue with EscherProperties.initProps()</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47143 - Fixed OOM in HSSFWorkbook#getAllPictures when reading .xls files containing metafiles</action>
|
||||
|
@ -66,6 +66,24 @@ public final class XSSFRelation extends POIXMLRelation {
|
||||
"/xl/workbook.xml",
|
||||
null
|
||||
);
|
||||
public static final XSSFRelation TEMPLATE_WORKBOOK = new XSSFRelation(
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||
"/xl/workbook.xml",
|
||||
null
|
||||
);
|
||||
public static final XSSFRelation MACRO_TEMPLATE_WORKBOOK = new XSSFRelation(
|
||||
"application/vnd.ms-excel.template.macroEnabled.main+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||
"/xl/workbook.xml",
|
||||
null
|
||||
);
|
||||
public static final XSSFRelation MACRO_ADDIN_WORKBOOK = new XSSFRelation(
|
||||
"application/vnd.ms-excel.addin.macroEnabled.main+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||
"/xl/workbook.xml",
|
||||
null
|
||||
);
|
||||
public static final XSSFRelation WORKSHEET = new XSSFRelation(
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet",
|
||||
|
@ -19,7 +19,7 @@ package org.apache.poi.xwpf.model;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||
|
||||
/**
|
||||
* Decorator class for XWPFParagraph allowing to add hyperlinks
|
||||
|
@ -30,15 +30,7 @@ import org.apache.xmlbeans.XmlOptions;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.apache.poi.openxml4j.opc.*;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTComment;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CommentsDocument;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
|
||||
|
||||
import javax.xml.namespace.QName;
|
||||
|
||||
@ -60,6 +52,7 @@ public class XWPFDocument extends POIXMLDocument {
|
||||
protected List<XWPFHyperlink> hyperlinks;
|
||||
protected List<XWPFParagraph> paragraphs;
|
||||
protected List<XWPFTable> tables;
|
||||
protected Map<Integer, XWPFFootnote> footnotes;
|
||||
|
||||
/** Handles the joy of different headers/footers for different pages */
|
||||
private XWPFHeaderFooterPolicy headerFooterPolicy;
|
||||
@ -87,6 +80,7 @@ public class XWPFDocument extends POIXMLDocument {
|
||||
comments = new ArrayList<XWPFComment>();
|
||||
paragraphs = new ArrayList<XWPFParagraph>();
|
||||
tables= new ArrayList<XWPFTable>();
|
||||
footnotes = new HashMap<Integer, XWPFFootnote>();
|
||||
|
||||
try {
|
||||
DocumentDocument doc = DocumentDocument.Factory.parse(getPackagePart().getInputStream());
|
||||
@ -94,6 +88,8 @@ public class XWPFDocument extends POIXMLDocument {
|
||||
|
||||
CTBody body = ctDocument.getBody();
|
||||
|
||||
initFootnotes();
|
||||
|
||||
// filling paragraph list
|
||||
for (CTP p : body.getPArray()) {
|
||||
paragraphs.add(new XWPFParagraph(p, this));
|
||||
@ -101,7 +97,7 @@ public class XWPFDocument extends POIXMLDocument {
|
||||
|
||||
// Get any tables
|
||||
for(CTTbl table : body.getTblArray()) {
|
||||
tables.add(new XWPFTable(table));
|
||||
tables.add(new XWPFTable(this, table));
|
||||
}
|
||||
|
||||
// Sort out headers and footers
|
||||
@ -118,7 +114,6 @@ public class XWPFDocument extends POIXMLDocument {
|
||||
}
|
||||
|
||||
initHyperlinks();
|
||||
|
||||
} catch (XmlException e) {
|
||||
throw new POIXMLException(e);
|
||||
}
|
||||
@ -139,6 +134,19 @@ public class XWPFDocument extends POIXMLDocument {
|
||||
}
|
||||
}
|
||||
|
||||
private void initFootnotes() throws XmlException, IOException {
|
||||
for(POIXMLDocumentPart p : getRelations()){
|
||||
String relation = p.getPackageRelationship().getRelationshipType();
|
||||
if(relation.equals(XWPFRelation.FOOTNOTE.getRelation())){
|
||||
FootnotesDocument footnotesDocument = FootnotesDocument.Factory.parse(p.getPackagePart().getInputStream());
|
||||
|
||||
for(CTFtnEdn ctFtnEdn : footnotesDocument.getFootnotes().getFootnoteArray()) {
|
||||
footnotes.put(ctFtnEdn.getId().intValue(), new XWPFFootnote(this, ctFtnEdn));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new SpreadsheetML package and setup the default minimal content
|
||||
*/
|
||||
@ -205,6 +213,15 @@ public class XWPFDocument extends POIXMLDocument {
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public XWPFFootnote getFootnoteByID(int id) {
|
||||
return footnotes.get(id);
|
||||
}
|
||||
|
||||
public Collection<XWPFFootnote> getFootnotes() {
|
||||
return footnotes == null ? new ArrayList<XWPFFootnote>() : footnotes.values();
|
||||
}
|
||||
|
||||
public XWPFHyperlink[] getHyperlinks() {
|
||||
return hyperlinks.toArray(
|
||||
new XWPFHyperlink[hyperlinks.size()]
|
||||
@ -323,7 +340,7 @@ public class XWPFDocument extends POIXMLDocument {
|
||||
* @return a new table
|
||||
*/
|
||||
public XWPFTable createTable(){
|
||||
return new XWPFTable(ctDocument.getBody().addNewTbl());
|
||||
return new XWPFTable(this, ctDocument.getBody().addNewTbl());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -333,7 +350,7 @@ public class XWPFDocument extends POIXMLDocument {
|
||||
* @return table
|
||||
*/
|
||||
public XWPFTable createTable(int rows, int cols) {
|
||||
return new XWPFTable(ctDocument.getBody().addNewTbl(), rows, cols);
|
||||
return new XWPFTable(this, ctDocument.getBody().addNewTbl(), rows, cols);
|
||||
}
|
||||
}
|
||||
|
||||
|
43
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFFootnote.java
Executable file
43
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFFootnote.java
Executable file
@ -0,0 +1,43 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xwpf.usermodel;
|
||||
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFtnEdn;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
|
||||
public class XWPFFootnote implements Iterable<XWPFParagraph> {
|
||||
private List<XWPFParagraph> paragraphs = new ArrayList<XWPFParagraph>();
|
||||
|
||||
public XWPFFootnote(XWPFDocument document, CTFtnEdn body) {
|
||||
for (CTP p : body.getPArray()) {
|
||||
paragraphs.add(new XWPFParagraph(p, document));
|
||||
}
|
||||
}
|
||||
|
||||
public List<XWPFParagraph> getParagraphs() {
|
||||
return paragraphs;
|
||||
}
|
||||
|
||||
public Iterator<XWPFParagraph> iterator(){
|
||||
return paragraphs.iterator();
|
||||
}
|
||||
|
||||
}
|
@ -65,7 +65,8 @@ public abstract class XWPFHeaderFooter {
|
||||
new XWPFTable[headerFooter.getTblArray().length];
|
||||
for(int i=0; i<tables.length; i++) {
|
||||
tables[i] = new XWPFTable(
|
||||
headerFooter.getTblArray(i)
|
||||
null,
|
||||
headerFooter.getTblArray(i)
|
||||
);
|
||||
}
|
||||
return tables;
|
||||
|
@ -21,26 +21,7 @@ import java.util.ArrayList;
|
||||
|
||||
import org.apache.xmlbeans.XmlCursor;
|
||||
import org.apache.xmlbeans.XmlObject;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBorder;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTInd;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTJc;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTOnOff;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPBdr;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPPr;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSpacing;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTextAlignment;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STBorder;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STJc;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STLineSpacingRule;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STOnOff;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STTextAlignment;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.w3c.dom.Text;
|
||||
|
||||
@ -58,6 +39,7 @@ public class XWPFParagraph {
|
||||
*/
|
||||
private StringBuffer text = new StringBuffer();
|
||||
private StringBuffer pictureText = new StringBuffer();
|
||||
private StringBuffer footnoteText = new StringBuffer();
|
||||
|
||||
|
||||
protected XWPFParagraph(CTP prgrph) {
|
||||
@ -66,79 +48,96 @@ public class XWPFParagraph {
|
||||
|
||||
|
||||
protected XWPFParagraph(CTP prgrph, XWPFDocument docRef) {
|
||||
this.paragraph = prgrph;
|
||||
this.document = docRef;
|
||||
this.paragraph = prgrph;
|
||||
this.document = docRef;
|
||||
|
||||
if (!isEmpty()) {
|
||||
// All the runs to loop over
|
||||
// TODO - replace this with some sort of XPath expression
|
||||
// to directly find all the CTRs, in the right order
|
||||
ArrayList<CTR> rs = new ArrayList<CTR>();
|
||||
CTR[] tmp;
|
||||
if (!isEmpty()) {
|
||||
// All the runs to loop over
|
||||
// TODO - replace this with some sort of XPath expression
|
||||
// to directly find all the CTRs, in the right order
|
||||
ArrayList<CTR> rs = new ArrayList<CTR>();
|
||||
CTR[] tmp;
|
||||
|
||||
// Get the main text runs
|
||||
tmp = paragraph.getRArray();
|
||||
for (int i = 0; i < tmp.length; i++) {
|
||||
rs.add(tmp[i]);
|
||||
}
|
||||
// Get the main text runs
|
||||
tmp = paragraph.getRArray();
|
||||
for (int i = 0; i < tmp.length; i++) {
|
||||
rs.add(tmp[i]);
|
||||
}
|
||||
|
||||
// Not sure quite what these are, but they hold
|
||||
// more text runs
|
||||
CTSdtRun[] sdts = paragraph.getSdtArray();
|
||||
for (int i = 0; i < sdts.length; i++) {
|
||||
CTSdtContentRun run = sdts[i].getSdtContent();
|
||||
tmp = run.getRArray();
|
||||
for (int j = 0; j < tmp.length; j++) {
|
||||
rs.add(tmp[j]);
|
||||
}
|
||||
}
|
||||
// Not sure quite what these are, but they hold
|
||||
// more text runs
|
||||
CTSdtRun[] sdts = paragraph.getSdtArray();
|
||||
for (int i = 0; i < sdts.length; i++) {
|
||||
CTSdtContentRun run = sdts[i].getSdtContent();
|
||||
tmp = run.getRArray();
|
||||
for (int j = 0; j < tmp.length; j++) {
|
||||
rs.add(tmp[j]);
|
||||
}
|
||||
}
|
||||
|
||||
// Get text of the paragraph
|
||||
for (int j = 0; j < rs.size(); j++) {
|
||||
// Grab the text and tabs of the paragraph
|
||||
// Do so in a way that preserves the ordering
|
||||
XmlCursor c = rs.get(j).newCursor();
|
||||
c.selectPath("./*");
|
||||
while (c.toNextSelection()) {
|
||||
XmlObject o = c.getObject();
|
||||
if (o instanceof CTText) {
|
||||
text.append(((CTText) o).getStringValue());
|
||||
}
|
||||
if (o instanceof CTPTab) {
|
||||
text.append("\t");
|
||||
}
|
||||
}
|
||||
// Get text of the paragraph
|
||||
for (int j = 0; j < rs.size(); j++) {
|
||||
// Grab the text and tabs of the paragraph
|
||||
// Do so in a way that preserves the ordering
|
||||
XmlCursor c = rs.get(j).newCursor();
|
||||
c.selectPath("./*");
|
||||
while (c.toNextSelection()) {
|
||||
XmlObject o = c.getObject();
|
||||
if (o instanceof CTText) {
|
||||
text.append(((CTText) o).getStringValue());
|
||||
}
|
||||
if (o instanceof CTPTab) {
|
||||
text.append("\t");
|
||||
}
|
||||
//got a reference to a footnote
|
||||
if (o instanceof CTFtnEdnRef) {
|
||||
CTFtnEdnRef ftn = (CTFtnEdnRef) o;
|
||||
footnoteText.append("[").append(ftn.getId()).append(": ");
|
||||
XWPFFootnote footnote = document.getFootnoteByID(ftn.getId().intValue());
|
||||
|
||||
// Loop over pictures inside our
|
||||
// paragraph, looking for text in them
|
||||
CTPicture[] picts = rs.get(j).getPictArray();
|
||||
for (int k = 0; k < picts.length; k++) {
|
||||
XmlObject[] t = picts[k]
|
||||
.selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t");
|
||||
for (int m = 0; m < t.length; m++) {
|
||||
NodeList kids = t[m].getDomNode().getChildNodes();
|
||||
for (int n = 0; n < kids.getLength(); n++) {
|
||||
if (kids.item(n) instanceof Text) {
|
||||
pictureText.append("\n");
|
||||
pictureText.append(kids.item(n).getNodeValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
boolean first = true;
|
||||
for (XWPFParagraph p : footnote.getParagraphs()) {
|
||||
if (!first) {
|
||||
footnoteText.append("\n");
|
||||
first = false;
|
||||
}
|
||||
footnoteText.append(p.getText());
|
||||
}
|
||||
|
||||
footnoteText.append("]");
|
||||
}
|
||||
}
|
||||
|
||||
// Loop over pictures inside our
|
||||
// paragraph, looking for text in them
|
||||
CTPicture[] picts = rs.get(j).getPictArray();
|
||||
for (int k = 0; k < picts.length; k++) {
|
||||
XmlObject[] t = picts[k]
|
||||
.selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t");
|
||||
for (int m = 0; m < t.length; m++) {
|
||||
NodeList kids = t[m].getDomNode().getChildNodes();
|
||||
for (int n = 0; n < kids.getLength(); n++) {
|
||||
if (kids.item(n) instanceof Text) {
|
||||
pictureText.append("\n");
|
||||
pictureText.append(kids.item(n).getNodeValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public CTP getCTP() {
|
||||
return paragraph;
|
||||
return paragraph;
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return !paragraph.getDomNode().hasChildNodes();
|
||||
return !paragraph.getDomNode().hasChildNodes();
|
||||
}
|
||||
|
||||
public XWPFDocument getDocument() {
|
||||
return document;
|
||||
return document;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -146,7 +145,9 @@ public class XWPFParagraph {
|
||||
* in it.
|
||||
*/
|
||||
public String getText() {
|
||||
return getParagraphText() + getPictureText();
|
||||
StringBuffer out = new StringBuffer();
|
||||
out.append(text).append(footnoteText).append(pictureText);
|
||||
return out.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -154,14 +155,23 @@ public class XWPFParagraph {
|
||||
* paragraph
|
||||
*/
|
||||
public String getParagraphText() {
|
||||
return text.toString();
|
||||
return text.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns any text from any suitable pictures in the paragraph
|
||||
*/
|
||||
public String getPictureText() {
|
||||
return pictureText.toString();
|
||||
return pictureText.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the footnote text of the paragraph
|
||||
*
|
||||
* @return the footnote text or empty string if the paragraph does not have footnotes
|
||||
*/
|
||||
public String getFootnoteText() {
|
||||
return footnoteText.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -170,7 +180,7 @@ public class XWPFParagraph {
|
||||
* @return a new text run
|
||||
*/
|
||||
public XWPFRun createRun() {
|
||||
return new XWPFRun(paragraph.addNewR(), this);
|
||||
return new XWPFRun(paragraph.addNewR(), this);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -350,12 +360,12 @@ public class XWPFParagraph {
|
||||
* @see Borders a list of all types of borders
|
||||
*/
|
||||
public void setBorderBottom(Borders border) {
|
||||
CTPBdr ct = getCTPBrd(true);
|
||||
CTBorder pr = ct.isSetBottom() ? ct.getBottom() : ct.addNewBottom();
|
||||
if (border.getValue() == Borders.NONE.getValue())
|
||||
ct.unsetBottom();
|
||||
else
|
||||
pr.setVal(STBorder.Enum.forInt(border.getValue()));
|
||||
CTPBdr ct = getCTPBrd(true);
|
||||
CTBorder pr = ct.isSetBottom() ? ct.getBottom() : ct.addNewBottom();
|
||||
if (border.getValue() == Borders.NONE.getValue())
|
||||
ct.unsetBottom();
|
||||
else
|
||||
pr.setVal(STBorder.Enum.forInt(border.getValue()));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -367,13 +377,13 @@ public class XWPFParagraph {
|
||||
* @see Borders a list of all types of borders
|
||||
*/
|
||||
public Borders getBorderBottom() {
|
||||
CTPBdr border = getCTPBrd(false);
|
||||
CTBorder ct = null;
|
||||
if (border != null) {
|
||||
ct = border.getBottom();
|
||||
}
|
||||
STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
|
||||
return Borders.valueOf(ptrn.intValue());
|
||||
CTPBdr border = getCTPBrd(false);
|
||||
CTBorder ct = null;
|
||||
if (border != null) {
|
||||
ct = border.getBottom();
|
||||
}
|
||||
STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
|
||||
return Borders.valueOf(ptrn.intValue());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -399,12 +409,12 @@ public class XWPFParagraph {
|
||||
* @see Borders for a list of all possible borders
|
||||
*/
|
||||
public void setBorderLeft(Borders border) {
|
||||
CTPBdr ct = getCTPBrd(true);
|
||||
CTBorder pr = ct.isSetLeft() ? ct.getLeft() : ct.addNewLeft();
|
||||
if (border.getValue() == Borders.NONE.getValue())
|
||||
ct.unsetLeft();
|
||||
else
|
||||
pr.setVal(STBorder.Enum.forInt(border.getValue()));
|
||||
CTPBdr ct = getCTPBrd(true);
|
||||
CTBorder pr = ct.isSetLeft() ? ct.getLeft() : ct.addNewLeft();
|
||||
if (border.getValue() == Borders.NONE.getValue())
|
||||
ct.unsetLeft();
|
||||
else
|
||||
pr.setVal(STBorder.Enum.forInt(border.getValue()));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -416,13 +426,13 @@ public class XWPFParagraph {
|
||||
* @see Borders for a list of all possible borders
|
||||
*/
|
||||
public Borders getBorderLeft() {
|
||||
CTPBdr border = getCTPBrd(false);
|
||||
CTBorder ct = null;
|
||||
if (border != null) {
|
||||
ct = border.getLeft();
|
||||
}
|
||||
STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
|
||||
return Borders.valueOf(ptrn.intValue());
|
||||
CTPBdr border = getCTPBrd(false);
|
||||
CTBorder ct = null;
|
||||
if (border != null) {
|
||||
ct = border.getLeft();
|
||||
}
|
||||
STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
|
||||
return Borders.valueOf(ptrn.intValue());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -448,12 +458,12 @@ public class XWPFParagraph {
|
||||
* @see Borders for a list of all possible borders
|
||||
*/
|
||||
public void setBorderRight(Borders border) {
|
||||
CTPBdr ct = getCTPBrd(true);
|
||||
CTBorder pr = ct.isSetRight() ? ct.getRight() : ct.addNewRight();
|
||||
if (border.getValue() == Borders.NONE.getValue())
|
||||
ct.unsetRight();
|
||||
else
|
||||
pr.setVal(STBorder.Enum.forInt(border.getValue()));
|
||||
CTPBdr ct = getCTPBrd(true);
|
||||
CTBorder pr = ct.isSetRight() ? ct.getRight() : ct.addNewRight();
|
||||
if (border.getValue() == Borders.NONE.getValue())
|
||||
ct.unsetRight();
|
||||
else
|
||||
pr.setVal(STBorder.Enum.forInt(border.getValue()));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -465,13 +475,13 @@ public class XWPFParagraph {
|
||||
* @see Borders for a list of all possible borders
|
||||
*/
|
||||
public Borders getBorderRight() {
|
||||
CTPBdr border = getCTPBrd(false);
|
||||
CTBorder ct = null;
|
||||
if (border != null) {
|
||||
ct = border.getRight();
|
||||
}
|
||||
STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
|
||||
return Borders.valueOf(ptrn.intValue());
|
||||
CTPBdr border = getCTPBrd(false);
|
||||
CTBorder ct = null;
|
||||
if (border != null) {
|
||||
ct = border.getRight();
|
||||
}
|
||||
STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
|
||||
return Borders.valueOf(ptrn.intValue());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -501,12 +511,12 @@ public class XWPFParagraph {
|
||||
* @see Borders for a list of all possible borders
|
||||
*/
|
||||
public void setBorderBetween(Borders border) {
|
||||
CTPBdr ct = getCTPBrd(true);
|
||||
CTBorder pr = ct.isSetBetween() ? ct.getBetween() : ct.addNewBetween();
|
||||
if (border.getValue() == Borders.NONE.getValue())
|
||||
ct.unsetBetween();
|
||||
else
|
||||
pr.setVal(STBorder.Enum.forInt(border.getValue()));
|
||||
CTPBdr ct = getCTPBrd(true);
|
||||
CTBorder pr = ct.isSetBetween() ? ct.getBetween() : ct.addNewBetween();
|
||||
if (border.getValue() == Borders.NONE.getValue())
|
||||
ct.unsetBetween();
|
||||
else
|
||||
pr.setVal(STBorder.Enum.forInt(border.getValue()));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -518,13 +528,13 @@ public class XWPFParagraph {
|
||||
* @see Borders for a list of all possible borders
|
||||
*/
|
||||
public Borders getBorderBetween() {
|
||||
CTPBdr border = getCTPBrd(false);
|
||||
CTBorder ct = null;
|
||||
if (border != null) {
|
||||
ct = border.getBetween();
|
||||
}
|
||||
STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
|
||||
return Borders.valueOf(ptrn.intValue());
|
||||
CTPBdr border = getCTPBrd(false);
|
||||
CTBorder ct = null;
|
||||
if (border != null) {
|
||||
ct = border.getBetween();
|
||||
}
|
||||
STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
|
||||
return Borders.valueOf(ptrn.intValue());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -544,13 +554,13 @@ public class XWPFParagraph {
|
||||
* boolean value
|
||||
*/
|
||||
public void setPageBreak(boolean pageBreak) {
|
||||
CTPPr ppr = getCTPPr();
|
||||
CTOnOff ct_pageBreak = ppr.isSetPageBreakBefore() ? ppr
|
||||
.getPageBreakBefore() : ppr.addNewPageBreakBefore();
|
||||
if (pageBreak)
|
||||
ct_pageBreak.setVal(STOnOff.TRUE);
|
||||
else
|
||||
ct_pageBreak.setVal(STOnOff.FALSE);
|
||||
CTPPr ppr = getCTPPr();
|
||||
CTOnOff ct_pageBreak = ppr.isSetPageBreakBefore() ? ppr
|
||||
.getPageBreakBefore() : ppr.addNewPageBreakBefore();
|
||||
if (pageBreak)
|
||||
ct_pageBreak.setVal(STOnOff.TRUE);
|
||||
else
|
||||
ct_pageBreak.setVal(STOnOff.FALSE);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -569,14 +579,14 @@ public class XWPFParagraph {
|
||||
* @return boolean - if page break is set
|
||||
*/
|
||||
public boolean isPageBreak() {
|
||||
CTPPr ppr = getCTPPr();
|
||||
CTOnOff ct_pageBreak = ppr.isSetPageBreakBefore() ? ppr
|
||||
.getPageBreakBefore() : null;
|
||||
if (ct_pageBreak != null
|
||||
&& ct_pageBreak.getVal().intValue() == STOnOff.INT_TRUE)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
CTPPr ppr = getCTPPr();
|
||||
CTOnOff ct_pageBreak = ppr.isSetPageBreakBefore() ? ppr
|
||||
.getPageBreakBefore() : null;
|
||||
if (ct_pageBreak != null
|
||||
&& ct_pageBreak.getVal().intValue() == STOnOff.INT_TRUE)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -640,7 +650,7 @@ public class XWPFParagraph {
|
||||
* paragraph in the document in absolute units.
|
||||
*
|
||||
* @return bigInteger - value representing the spacing after the paragraph
|
||||
* @see #setSpacingAfterLines(int)
|
||||
* @see #setSpacingAfterLines(int)
|
||||
*/
|
||||
public int getSpacingAfterLines() {
|
||||
CTSpacing spacing = getCTSpacing(false);
|
||||
@ -902,12 +912,12 @@ public class XWPFParagraph {
|
||||
* @param wrap - boolean
|
||||
*/
|
||||
public void setWordWrap(boolean wrap) {
|
||||
CTOnOff wordWrap = getCTPPr().isSetWordWrap() ? getCTPPr()
|
||||
.getWordWrap() : getCTPPr().addNewWordWrap();
|
||||
if (wrap)
|
||||
wordWrap.setVal(STOnOff.TRUE);
|
||||
else
|
||||
wordWrap.unsetVal();
|
||||
CTOnOff wordWrap = getCTPPr().isSetWordWrap() ? getCTPPr()
|
||||
.getWordWrap() : getCTPPr().addNewWordWrap();
|
||||
if (wrap)
|
||||
wordWrap.setVal(STOnOff.TRUE);
|
||||
else
|
||||
wordWrap.unsetVal();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -919,14 +929,14 @@ public class XWPFParagraph {
|
||||
* @return boolean
|
||||
*/
|
||||
public boolean isWordWrap() {
|
||||
CTOnOff wordWrap = getCTPPr().isSetWordWrap() ? getCTPPr()
|
||||
.getWordWrap() : null;
|
||||
if (wordWrap != null) {
|
||||
return (wordWrap.getVal() == STOnOff.ON
|
||||
|| wordWrap.getVal() == STOnOff.TRUE || wordWrap.getVal() == STOnOff.X_1) ? true
|
||||
: false;
|
||||
} else
|
||||
return false;
|
||||
CTOnOff wordWrap = getCTPPr().isSetWordWrap() ? getCTPPr()
|
||||
.getWordWrap() : null;
|
||||
if (wordWrap != null) {
|
||||
return (wordWrap.getVal() == STOnOff.ON
|
||||
|| wordWrap.getVal() == STOnOff.TRUE || wordWrap.getVal() == STOnOff.X_1) ? true
|
||||
: false;
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -40,6 +40,24 @@ public final class XWPFRelation extends POIXMLRelation {
|
||||
"/word/document.xml",
|
||||
null
|
||||
);
|
||||
public static final XWPFRelation TEMPLATE = new XWPFRelation(
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||
"/word/document.xml",
|
||||
null
|
||||
);
|
||||
public static final XWPFRelation MACRO_DOCUMENT = new XWPFRelation(
|
||||
"application/vnd.ms-word.document.macroEnabled.main+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||
"/word/document.xml",
|
||||
null
|
||||
);
|
||||
public static final XWPFRelation MACRO_TEMPLATE_DOCUMENT = new XWPFRelation(
|
||||
"application/vnd.ms-word.template.macroEnabledTemplate.main+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
|
||||
"/word/document.xml",
|
||||
null
|
||||
);
|
||||
public static final XWPFRelation FONT_TABLE = new XWPFRelation(
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable",
|
||||
@ -88,6 +106,12 @@ public final class XWPFRelation extends POIXMLRelation {
|
||||
null,
|
||||
null
|
||||
);
|
||||
public static final XWPFRelation FOOTNOTE = new XWPFRelation(
|
||||
null,
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes",
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
|
||||
private XWPFRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) {
|
||||
|
@ -42,8 +42,8 @@ public class XWPFTable {
|
||||
private CTTbl ctTbl;
|
||||
|
||||
|
||||
public XWPFTable(CTTbl table, int row, int col) {
|
||||
this(table);
|
||||
public XWPFTable(XWPFDocument doc, CTTbl table, int row, int col) {
|
||||
this(doc, table);
|
||||
for (int i = 0; i < row; i++) {
|
||||
XWPFTableRow tabRow = (getRow(i) == null) ? createRow() : getRow(i);
|
||||
for (int k = 0; k < col; k++) {
|
||||
@ -54,7 +54,7 @@ public class XWPFTable {
|
||||
}
|
||||
|
||||
|
||||
public XWPFTable(CTTbl table) {
|
||||
public XWPFTable(XWPFDocument doc, CTTbl table) {
|
||||
this.ctTbl = table;
|
||||
|
||||
// is an empty table: I add one row and one column as default
|
||||
@ -65,7 +65,7 @@ public class XWPFTable {
|
||||
StringBuffer rowText = new StringBuffer();
|
||||
for (CTTc cell : row.getTcArray()) {
|
||||
for (CTP ctp : cell.getPArray()) {
|
||||
XWPFParagraph p = new XWPFParagraph(ctp, null);
|
||||
XWPFParagraph p = new XWPFParagraph(ctp, doc);
|
||||
if (rowText.length() > 0) {
|
||||
rowText.append('\t');
|
||||
}
|
||||
|
@ -17,6 +17,7 @@
|
||||
package org.apache.poi.xwpf.extractor;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.poi.POIXMLDocument;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||
@ -27,202 +28,176 @@ import junit.framework.TestCase;
|
||||
* Tests for HXFWordExtractor
|
||||
*/
|
||||
public class TestXWPFWordExtractor extends TestCase {
|
||||
/**
|
||||
* A very simple file
|
||||
*/
|
||||
private XWPFDocument xmlA;
|
||||
private File fileA;
|
||||
/**
|
||||
* A fairly complex file
|
||||
*/
|
||||
private XWPFDocument xmlB;
|
||||
private File fileB;
|
||||
/**
|
||||
* With a simplish header+footer
|
||||
*/
|
||||
private XWPFDocument xmlC;
|
||||
private File fileC;
|
||||
/**
|
||||
* With different header+footer on first/rest
|
||||
*/
|
||||
private XWPFDocument xmlD;
|
||||
private File fileD;
|
||||
|
||||
/**
|
||||
* File with hyperlinks
|
||||
*/
|
||||
private XWPFDocument xmlE;
|
||||
private File fileE;
|
||||
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
fileA = new File(
|
||||
System.getProperty("HWPF.testdata.path") +
|
||||
File.separator + "sample.docx"
|
||||
);
|
||||
fileB = new File(
|
||||
System.getProperty("HWPF.testdata.path") +
|
||||
File.separator + "IllustrativeCases.docx"
|
||||
);
|
||||
fileC = new File(
|
||||
System.getProperty("HWPF.testdata.path") +
|
||||
File.separator + "ThreeColHeadFoot.docx"
|
||||
);
|
||||
fileD = new File(
|
||||
System.getProperty("HWPF.testdata.path") +
|
||||
File.separator + "DiffFirstPageHeadFoot.docx"
|
||||
);
|
||||
fileE = new File(
|
||||
System.getProperty("HWPF.testdata.path") +
|
||||
File.separator + "TestDocument.docx"
|
||||
);
|
||||
assertTrue(fileA.exists());
|
||||
assertTrue(fileB.exists());
|
||||
assertTrue(fileC.exists());
|
||||
assertTrue(fileD.exists());
|
||||
assertTrue(fileE.exists());
|
||||
|
||||
xmlA = new XWPFDocument(POIXMLDocument.openPackage(fileA.toString()));
|
||||
xmlB = new XWPFDocument(POIXMLDocument.openPackage(fileB.toString()));
|
||||
xmlC = new XWPFDocument(POIXMLDocument.openPackage(fileC.toString()));
|
||||
xmlD = new XWPFDocument(POIXMLDocument.openPackage(fileD.toString()));
|
||||
xmlE = new XWPFDocument(POIXMLDocument.openPackage(fileE.toString()));
|
||||
}
|
||||
/**
|
||||
* Get text out of the simple file
|
||||
*/
|
||||
public void testGetSimpleText() throws Exception {
|
||||
XWPFDocument doc = open("sample.docx");
|
||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||
|
||||
/**
|
||||
* Get text out of the simple file
|
||||
*/
|
||||
public void testGetSimpleText() throws Exception {
|
||||
new XWPFWordExtractor(xmlA);
|
||||
new XWPFWordExtractor(POIXMLDocument.openPackage(fileA.toString()));
|
||||
|
||||
XWPFWordExtractor extractor =
|
||||
new XWPFWordExtractor(xmlA);
|
||||
extractor.getText();
|
||||
|
||||
String text = extractor.getText();
|
||||
assertTrue(text.length() > 0);
|
||||
|
||||
// Check contents
|
||||
assertTrue(text.startsWith(
|
||||
"Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc at risus vel erat tempus posuere. Aenean non ante. Suspendisse vehicula dolor sit amet odio."
|
||||
));
|
||||
assertTrue(text.endsWith(
|
||||
"Phasellus ultricies mi nec leo. Sed tempus. In sit amet lorem at velit faucibus vestibulum.\n"
|
||||
));
|
||||
|
||||
// Check number of paragraphs
|
||||
int ps = 0;
|
||||
char[] t = text.toCharArray();
|
||||
for (int i = 0; i < t.length; i++) {
|
||||
if(t[i] == '\n') { ps++; }
|
||||
}
|
||||
assertEquals(3, ps);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests getting the text out of a complex file
|
||||
*/
|
||||
public void testGetComplexText() throws Exception {
|
||||
XWPFWordExtractor extractor =
|
||||
new XWPFWordExtractor(xmlB);
|
||||
extractor.getText();
|
||||
|
||||
String text = extractor.getText();
|
||||
assertTrue(text.length() > 0);
|
||||
|
||||
char euro = '\u20ac';
|
||||
String text = extractor.getText();
|
||||
assertTrue(text.length() > 0);
|
||||
|
||||
// Check contents
|
||||
assertTrue(text.startsWith(
|
||||
"Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc at risus vel erat tempus posuere. Aenean non ante. Suspendisse vehicula dolor sit amet odio."
|
||||
));
|
||||
assertTrue(text.endsWith(
|
||||
"Phasellus ultricies mi nec leo. Sed tempus. In sit amet lorem at velit faucibus vestibulum.\n"
|
||||
));
|
||||
|
||||
// Check number of paragraphs
|
||||
int ps = 0;
|
||||
char[] t = text.toCharArray();
|
||||
for (int i = 0; i < t.length; i++) {
|
||||
if (t[i] == '\n') {
|
||||
ps++;
|
||||
}
|
||||
}
|
||||
assertEquals(3, ps);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests getting the text out of a complex file
|
||||
*/
|
||||
public void testGetComplexText() throws Exception {
|
||||
XWPFDocument doc = open("IllustrativeCases.docx");
|
||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||
|
||||
String text = extractor.getText();
|
||||
assertTrue(text.length() > 0);
|
||||
|
||||
char euro = '\u20ac';
|
||||
// System.err.println("'"+text.substring(text.length() - 40) + "'");
|
||||
|
||||
// Check contents
|
||||
assertTrue(text.startsWith(
|
||||
" \n(V) ILLUSTRATIVE CASES\n\n"
|
||||
));
|
||||
assertTrue(text.contains(
|
||||
"As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
|
||||
));
|
||||
assertTrue(text.endsWith(
|
||||
"11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n"
|
||||
));
|
||||
|
||||
// Check number of paragraphs
|
||||
int ps = 0;
|
||||
char[] t = text.toCharArray();
|
||||
for (int i = 0; i < t.length; i++) {
|
||||
if(t[i] == '\n') { ps++; }
|
||||
}
|
||||
assertEquals(103, ps);
|
||||
}
|
||||
|
||||
public void testGetWithHyperlinks() throws Exception {
|
||||
XWPFWordExtractor extractor =
|
||||
new XWPFWordExtractor(xmlE);
|
||||
extractor.getText();
|
||||
extractor.setFetchHyperlinks(true);
|
||||
extractor.getText();
|
||||
|
||||
// Now check contents
|
||||
// TODO - fix once correctly handling contents
|
||||
extractor.setFetchHyperlinks(false);
|
||||
assertEquals(
|
||||
// Check contents
|
||||
assertTrue(text.startsWith(
|
||||
" \n(V) ILLUSTRATIVE CASES\n\n"
|
||||
));
|
||||
assertTrue(text.contains(
|
||||
"As well as gaining " + euro + "90 from child benefit increases, he will also receive the early childhood supplement of " + euro + "250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
|
||||
));
|
||||
assertTrue(text.endsWith(
|
||||
"11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n"
|
||||
));
|
||||
|
||||
// Check number of paragraphs
|
||||
int ps = 0;
|
||||
char[] t = text.toCharArray();
|
||||
for (int i = 0; i < t.length; i++) {
|
||||
if (t[i] == '\n') {
|
||||
ps++;
|
||||
}
|
||||
}
|
||||
assertEquals(103, ps);
|
||||
}
|
||||
|
||||
public void testGetWithHyperlinks() throws Exception {
|
||||
XWPFDocument doc = open("TestDocument.docx");
|
||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||
|
||||
// Now check contents
|
||||
// TODO - fix once correctly handling contents
|
||||
extractor.setFetchHyperlinks(false);
|
||||
assertEquals(
|
||||
// "This is a test document\nThis bit is in bold and italic\n" +
|
||||
// "Back to normal\nWe have a hyperlink here, and another.\n",
|
||||
"This is a test document\nThis bit is in bold and italic\n" +
|
||||
"Back to normal\nWe have a here, and .hyperlinkanother\n",
|
||||
extractor.getText()
|
||||
);
|
||||
|
||||
extractor.setFetchHyperlinks(true);
|
||||
assertEquals(
|
||||
"This is a test document\nThis bit is in bold and italic\n" +
|
||||
"Back to normal\nWe have a here, and .hyperlinkanother\n",
|
||||
extractor.getText()
|
||||
);
|
||||
|
||||
extractor.setFetchHyperlinks(true);
|
||||
assertEquals(
|
||||
// "This is a test document\nThis bit is in bold and italic\n" +
|
||||
// "Back to normal\nWe have a hyperlink here, and another.\n",
|
||||
"This is a test document\nThis bit is in bold and italic\n" +
|
||||
"Back to normal\nWe have a here, and .hyperlink <http://poi.apache.org/>another\n",
|
||||
extractor.getText()
|
||||
);
|
||||
}
|
||||
|
||||
public void testHeadersFooters() throws Exception {
|
||||
XWPFWordExtractor extractor =
|
||||
new XWPFWordExtractor(xmlC);
|
||||
extractor.getText();
|
||||
|
||||
assertEquals(
|
||||
"First header column!\tMid header\tRight header!\n" +
|
||||
"This is a sample word document. It has two pages. It has a three column heading, and a three column footer\n" +
|
||||
"\n" +
|
||||
"HEADING TEXT\n" +
|
||||
"\n" +
|
||||
"More on page one\n" +
|
||||
"\n\n" +
|
||||
"End of page 1\n\n" +
|
||||
"This is page two. It also has a three column heading, and a three column footer.\n" +
|
||||
"Footer Left\tFooter Middle\tFooter Right\n",
|
||||
extractor.getText()
|
||||
);
|
||||
|
||||
|
||||
// Now another file, expect multiple headers
|
||||
// and multiple footers
|
||||
extractor =
|
||||
new XWPFWordExtractor(xmlD);
|
||||
extractor.getText();
|
||||
|
||||
assertEquals(
|
||||
"I am the header on the first page, and I" + '\u2019' + "m nice and simple\n" +
|
||||
"First header column!\tMid header\tRight header!\n" +
|
||||
"This is a sample word document. It has two pages. It has a simple header and footer, which is different to all the other pages.\n" +
|
||||
"\n" +
|
||||
"HEADING TEXT\n" +
|
||||
"\n" +
|
||||
"More on page one\n" +
|
||||
"\n\n" +
|
||||
"End of page 1\n\n" +
|
||||
"This is page two. It also has a three column heading, and a three column footer.\n" +
|
||||
"The footer of the first page\n" +
|
||||
"Footer Left\tFooter Middle\tFooter Right\n",
|
||||
extractor.getText()
|
||||
);
|
||||
}
|
||||
"This is a test document\nThis bit is in bold and italic\n" +
|
||||
"Back to normal\nWe have a here, and .hyperlink <http://poi.apache.org/>another\n",
|
||||
extractor.getText()
|
||||
);
|
||||
}
|
||||
|
||||
public void testHeadersFooters() throws Exception {
|
||||
XWPFDocument doc = open("ThreeColHeadFoot.docx");
|
||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||
|
||||
assertEquals(
|
||||
"First header column!\tMid header\tRight header!\n" +
|
||||
"This is a sample word document. It has two pages. It has a three column heading, and a three column footer\n" +
|
||||
"\n" +
|
||||
"HEADING TEXT\n" +
|
||||
"\n" +
|
||||
"More on page one\n" +
|
||||
"\n\n" +
|
||||
"End of page 1\n\n" +
|
||||
"This is page two. It also has a three column heading, and a three column footer.\n" +
|
||||
"Footer Left\tFooter Middle\tFooter Right\n",
|
||||
extractor.getText()
|
||||
);
|
||||
|
||||
// Now another file, expect multiple headers
|
||||
// and multiple footers
|
||||
doc = open("DiffFirstPageHeadFoot.docx");
|
||||
extractor = new XWPFWordExtractor(doc);
|
||||
extractor =
|
||||
new XWPFWordExtractor(doc);
|
||||
extractor.getText();
|
||||
|
||||
assertEquals(
|
||||
"I am the header on the first page, and I" + '\u2019' + "m nice and simple\n" +
|
||||
"First header column!\tMid header\tRight header!\n" +
|
||||
"This is a sample word document. It has two pages. It has a simple header and footer, which is different to all the other pages.\n" +
|
||||
"\n" +
|
||||
"HEADING TEXT\n" +
|
||||
"\n" +
|
||||
"More on page one\n" +
|
||||
"\n\n" +
|
||||
"End of page 1\n\n" +
|
||||
"This is page two. It also has a three column heading, and a three column footer.\n" +
|
||||
"The footer of the first page\n" +
|
||||
"Footer Left\tFooter Middle\tFooter Right\n",
|
||||
extractor.getText()
|
||||
);
|
||||
}
|
||||
|
||||
public void testFootnotes() throws Exception {
|
||||
XWPFDocument doc = open("footnotes.docx");
|
||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||
|
||||
assertTrue(extractor.getText().contains("snoska"));
|
||||
}
|
||||
|
||||
|
||||
public void testTableFootnotes() throws Exception {
|
||||
XWPFDocument doc = open("table_footnotes.docx");
|
||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||
|
||||
assertTrue(extractor.getText().contains("snoska"));
|
||||
}
|
||||
|
||||
public void testFormFootnotes() throws Exception {
|
||||
XWPFDocument doc = open("form_footnotes.docx");
|
||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||
|
||||
String text = extractor.getText();
|
||||
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
|
||||
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
|
||||
}
|
||||
|
||||
//TODO use the same logic as in HSSFTestDataSamples
|
||||
private XWPFDocument open(String sampleFileName) throws IOException {
|
||||
File file = new File(
|
||||
System.getProperty("HWPF.testdata.path"), sampleFileName);
|
||||
|
||||
try {
|
||||
if(!sampleFileName.equals(file.getCanonicalFile().getName())){
|
||||
throw new RuntimeException("File name is case-sensitive: requested '" + sampleFileName
|
||||
+ "' but actual file is '" + file.getCanonicalFile().getName() + "'");
|
||||
}
|
||||
} catch (IOException e){
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return new XWPFDocument(POIXMLDocument.openPackage(file.getPath()));
|
||||
}
|
||||
}
|
||||
|
@ -43,14 +43,14 @@ public class TestXWPFTable extends TestCase {
|
||||
|
||||
public void testConstructor() {
|
||||
CTTbl ctTable=CTTbl.Factory.newInstance();
|
||||
XWPFTable xtab=new XWPFTable(ctTable);
|
||||
XWPFTable xtab=new XWPFTable(null, ctTable);
|
||||
assertNotNull(xtab);
|
||||
assertEquals(1,ctTable.sizeOfTrArray());
|
||||
assertEquals(1,ctTable.getTrArray(0).sizeOfTcArray());
|
||||
assertNotNull(ctTable.getTrArray(0).getTcArray(0).getPArray(0));
|
||||
|
||||
ctTable=CTTbl.Factory.newInstance();
|
||||
xtab=new XWPFTable(ctTable, 3,2);
|
||||
xtab=new XWPFTable(null, ctTable, 3,2);
|
||||
assertNotNull(xtab);
|
||||
assertEquals(3,ctTable.sizeOfTrArray());
|
||||
assertEquals(2,ctTable.getTrArray(0).sizeOfTcArray());
|
||||
@ -67,7 +67,7 @@ public class TestXWPFTable extends TestCase {
|
||||
CTText text=run.addNewT();
|
||||
text.setStringValue("finally I can write!");
|
||||
|
||||
XWPFTable xtab=new XWPFTable(table);
|
||||
XWPFTable xtab=new XWPFTable(null, table);
|
||||
assertEquals("finally I can write!\n",xtab.getText());
|
||||
}
|
||||
|
||||
@ -84,7 +84,7 @@ public class TestXWPFTable extends TestCase {
|
||||
r3.addNewTc().addNewP();
|
||||
r3.addNewTc().addNewP();
|
||||
|
||||
XWPFTable xtab=new XWPFTable(table);
|
||||
XWPFTable xtab=new XWPFTable(null, table);
|
||||
assertEquals(3,xtab.getNumberOfRows());
|
||||
assertNotNull(xtab.getRow(2));
|
||||
|
||||
@ -95,7 +95,7 @@ public class TestXWPFTable extends TestCase {
|
||||
assertEquals(2,table.getTrArray(0).sizeOfTcArray());
|
||||
|
||||
//check creation of first row
|
||||
xtab=new XWPFTable(CTTbl.Factory.newInstance());
|
||||
xtab=new XWPFTable(null, CTTbl.Factory.newInstance());
|
||||
assertEquals(1,xtab.getCTTbl().getTrArray(0).sizeOfTcArray());
|
||||
}
|
||||
|
||||
@ -104,7 +104,7 @@ public class TestXWPFTable extends TestCase {
|
||||
CTTbl table = CTTbl.Factory.newInstance();
|
||||
table.addNewTblPr().addNewTblW().setW(new BigInteger("1000"));
|
||||
|
||||
XWPFTable xtab=new XWPFTable(table);
|
||||
XWPFTable xtab=new XWPFTable(null, table);
|
||||
|
||||
assertEquals(1000,xtab.getWidth());
|
||||
|
||||
|
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/footnotes.docx
Executable file
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/footnotes.docx
Executable file
Binary file not shown.
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/form_footnotes.docx
Executable file
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/form_footnotes.docx
Executable file
Binary file not shown.
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/table_footnotes.docx
Executable file
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/table_footnotes.docx
Executable file
Binary file not shown.
Loading…
Reference in New Issue
Block a user