XWPF paragraph improvements - Make XWPFParagraph make more use of XWPFRun, and less on internal StringBuffers. Also improve handling of Hyperlinks inside XWPFParagraph objects through XWPFHyperlinkRun
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@996899 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
77f5742c75
commit
7fad16fd1b
@ -34,6 +34,8 @@
|
|||||||
|
|
||||||
<changes>
|
<changes>
|
||||||
<release version="3.7-beta3" date="2010-??-??">
|
<release version="3.7-beta3" date="2010-??-??">
|
||||||
|
<action dev="poi-developers" type="fix">Improve handling of Hyperlinks inside XWPFParagraph objects through XWPFHyperlinkRun</action>
|
||||||
|
<action dev="poi-developers" type="fix">Make XWPFParagraph make more use of XWPFRun, and less on internal StringBuffers</action>
|
||||||
<action dev="poi-developers" type="add">Add a getBodyElements() method to XWPF IBody, to make access to embedded paragraphs and tables easier</action>
|
<action dev="poi-developers" type="add">Add a getBodyElements() method to XWPF IBody, to make access to embedded paragraphs and tables easier</action>
|
||||||
<action dev="poi-developers" type="add">More XSLFRelation entries for common .pptx file parts</action>
|
<action dev="poi-developers" type="add">More XSLFRelation entries for common .pptx file parts</action>
|
||||||
<action dev="poi-developers" type="fix">49872 - avoid exception in XSSFFormulaEvaluator.evaluateInCell when evaluating shared formulas</action>
|
<action dev="poi-developers" type="fix">49872 - avoid exception in XSSFFormulaEvaluator.evaluateInCell when evaluating shared formulas</action>
|
||||||
|
@ -29,8 +29,11 @@ import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
|
|||||||
import org.apache.poi.xwpf.model.XWPFHyperlinkDecorator;
|
import org.apache.poi.xwpf.model.XWPFHyperlinkDecorator;
|
||||||
import org.apache.poi.xwpf.model.XWPFParagraphDecorator;
|
import org.apache.poi.xwpf.model.XWPFParagraphDecorator;
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFHyperlink;
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun;
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFRelation;
|
import org.apache.poi.xwpf.usermodel.XWPFRelation;
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFRun;
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFTable;
|
import org.apache.poi.xwpf.usermodel.XWPFTable;
|
||||||
import org.apache.xmlbeans.XmlException;
|
import org.apache.xmlbeans.XmlException;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
|
||||||
@ -103,9 +106,28 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
|||||||
extractHeaders(text, headerFooterPolicy);
|
extractHeaders(text, headerFooterPolicy);
|
||||||
}
|
}
|
||||||
|
|
||||||
XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
|
// Do the paragraph text
|
||||||
new XWPFHyperlinkDecorator(paragraph, null, fetchHyperlinks));
|
for(XWPFRun run : paragraph.getRuns()) {
|
||||||
text.append(decorator.getText()).append('\n');
|
text.append(run.toString());
|
||||||
|
if(run instanceof XWPFHyperlinkRun && fetchHyperlinks) {
|
||||||
|
XWPFHyperlink link = ((XWPFHyperlinkRun)run).getHyperlink(document);
|
||||||
|
if(link != null)
|
||||||
|
text.append(" <" + link.getURL() + ">");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add comments
|
||||||
|
XWPFCommentsDecorator decorator = new XWPFCommentsDecorator(paragraph, null);
|
||||||
|
text.append(decorator.getCommentText()).append('\n');
|
||||||
|
|
||||||
|
// Do endnotes, footnotes and pictures
|
||||||
|
for(String str : new String[] {
|
||||||
|
paragraph.getFootnoteText(), paragraph.getPictureText()
|
||||||
|
}) {
|
||||||
|
if(str != null && str.length() > 0) {
|
||||||
|
text.append(str + "\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (ctSectPr!=null) {
|
if (ctSectPr!=null) {
|
||||||
extractFooters(text, headerFooterPolicy);
|
extractFooters(text, headerFooterPolicy);
|
||||||
|
@ -46,6 +46,10 @@ public class XWPFCommentsDecorator extends XWPFParagraphDecorator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getCommentText() {
|
||||||
|
return commentText.toString();
|
||||||
|
}
|
||||||
|
|
||||||
public String getText() {
|
public String getText() {
|
||||||
return super.getText() + commentText;
|
return super.getText() + commentText;
|
||||||
}
|
}
|
||||||
|
@ -19,15 +19,18 @@ package org.apache.poi.xwpf.model;
|
|||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun;
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decorator class for XWPFParagraph allowing to add hyperlinks
|
* Decorator class for XWPFParagraph allowing to add hyperlinks
|
||||||
* found in paragraph to its text.
|
* found in paragraph to its text.
|
||||||
*
|
*
|
||||||
* TODO - add the hyperlink text in the right place, and not just
|
* Note - adds the hyperlink at the end, not in the right place...
|
||||||
* at the end
|
*
|
||||||
|
* @deprecated Use {@link XWPFHyperlinkRun} instead
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public class XWPFHyperlinkDecorator extends XWPFParagraphDecorator {
|
public class XWPFHyperlinkDecorator extends XWPFParagraphDecorator {
|
||||||
private StringBuffer hyperlinkText;
|
private StringBuffer hyperlinkText;
|
||||||
|
|
||||||
|
@ -0,0 +1,64 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.xwpf.usermodel;
|
||||||
|
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A run of text with a Hyperlink applied to it.
|
||||||
|
* Any given Hyperlink may be made up of multiple of these.
|
||||||
|
*/
|
||||||
|
public class XWPFHyperlinkRun extends XWPFRun
|
||||||
|
{
|
||||||
|
private CTHyperlink hyperlink;
|
||||||
|
|
||||||
|
public XWPFHyperlinkRun(CTHyperlink hyperlink, CTR run, XWPFParagraph p) {
|
||||||
|
super(run, p);
|
||||||
|
this.hyperlink = hyperlink;
|
||||||
|
}
|
||||||
|
|
||||||
|
public CTHyperlink getCTHyperlink() {
|
||||||
|
return hyperlink;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAnchor() {
|
||||||
|
return hyperlink.getAnchor();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the ID of the hyperlink, if one is set.
|
||||||
|
*/
|
||||||
|
public String getHyperlinkId() {
|
||||||
|
return hyperlink.getId();
|
||||||
|
}
|
||||||
|
public void setHyperlinkId(String id) {
|
||||||
|
hyperlink.setId(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If this Hyperlink is an external reference hyperlink,
|
||||||
|
* return the object for it.
|
||||||
|
*/
|
||||||
|
public XWPFHyperlink getHyperlink(XWPFDocument document) {
|
||||||
|
String id = getHyperlinkId();
|
||||||
|
if(id == null)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
return document.getHyperlinkByID(id);
|
||||||
|
}
|
||||||
|
}
|
@ -20,21 +20,19 @@ import java.math.BigInteger;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import org.apache.poi.util.Internal;
|
import org.apache.poi.util.Internal;
|
||||||
import org.apache.xmlbeans.XmlCursor;
|
import org.apache.xmlbeans.XmlCursor;
|
||||||
import org.apache.xmlbeans.XmlObject;
|
import org.apache.xmlbeans.XmlObject;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBorder;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBorder;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTEmpty;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFtnEdnRef;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFtnEdnRef;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTInd;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTInd;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTJc;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTJc;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTOnOff;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTOnOff;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPBdr;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPBdr;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPPr;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPPr;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
|
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTProofErr;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTProofErr;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
||||||
@ -66,10 +64,6 @@ public class XWPFParagraph implements IBodyElement{
|
|||||||
protected XWPFDocument document;
|
protected XWPFDocument document;
|
||||||
protected List<XWPFRun> runs;
|
protected List<XWPFRun> runs;
|
||||||
|
|
||||||
/**
|
|
||||||
* TODO - replace with RichText String
|
|
||||||
*/
|
|
||||||
private StringBuffer text = new StringBuffer();
|
|
||||||
private StringBuffer pictureText = new StringBuffer();
|
private StringBuffer pictureText = new StringBuffer();
|
||||||
private StringBuffer footnoteText = new StringBuffer();
|
private StringBuffer footnoteText = new StringBuffer();
|
||||||
|
|
||||||
@ -91,102 +85,76 @@ public class XWPFParagraph implements IBodyElement{
|
|||||||
}
|
}
|
||||||
|
|
||||||
runs = new ArrayList<XWPFRun>();
|
runs = new ArrayList<XWPFRun>();
|
||||||
if (prgrph.getRList().size() > 0) {
|
|
||||||
for(CTR ctRun : prgrph.getRList()) {
|
|
||||||
runs.add(new XWPFRun(ctRun, this));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!isEmpty()) {
|
// Get all our child nodes in order, and process them
|
||||||
readNewText();
|
// into XWPFRuns where we can
|
||||||
}
|
XmlCursor c = paragraph.newCursor();
|
||||||
}
|
c.selectPath("child::*");
|
||||||
|
while (c.toNextSelection()) {
|
||||||
|
XmlObject o = c.getObject();
|
||||||
|
if(o instanceof CTR) {
|
||||||
|
runs.add(new XWPFRun((CTR)o, this));
|
||||||
|
}
|
||||||
|
if(o instanceof CTHyperlink) {
|
||||||
|
CTHyperlink link = (CTHyperlink)o;
|
||||||
|
for(CTR r : link.getRList()) {
|
||||||
|
runs.add(new XWPFHyperlinkRun(link, r, this));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(o instanceof CTSdtRun) {
|
||||||
|
CTSdtContentRun run = ((CTSdtRun)o).getSdtContent();
|
||||||
|
for(CTR r : run.getRList()) {
|
||||||
|
runs.add(new XWPFRun(r, this));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(o instanceof CTRunTrackChange) {
|
||||||
|
for(CTR r : ((CTRunTrackChange)o).getRList()) {
|
||||||
|
runs.add(new XWPFRun(r, this));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(o instanceof CTSimpleField) {
|
||||||
|
for(CTR r : ((CTSimpleField)o).getRList()) {
|
||||||
|
runs.add(new XWPFRun(r, this));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected String readNewText() {
|
// Look for bits associated with the runs
|
||||||
StringBuffer text = new StringBuffer();
|
for(XWPFRun run : runs) {
|
||||||
|
CTR r = run.getCTR();
|
||||||
|
|
||||||
// All the runs to loop over
|
// Check for bits that only apply when
|
||||||
// TODO - replace this with some sort of XPath expression
|
// attached to a core document
|
||||||
// to directly find all the CTRs, in the right order
|
if(document != null) {
|
||||||
ArrayList<CTR> rs = new ArrayList<CTR>();
|
c = r.newCursor();
|
||||||
rs.addAll( paragraph.getRList() );
|
c.selectPath("child::*");
|
||||||
|
while (c.toNextSelection()) {
|
||||||
|
XmlObject o = c.getObject();
|
||||||
|
if(o instanceof CTFtnEdnRef) {
|
||||||
|
CTFtnEdnRef ftn = (CTFtnEdnRef)o;
|
||||||
|
footnoteText.append("[").append(ftn.getId()).append(": ");
|
||||||
|
XWPFFootnote footnote =
|
||||||
|
ftn.getDomNode().getLocalName().equals("footnoteReference") ?
|
||||||
|
document.getFootnoteByID(ftn.getId().intValue()) :
|
||||||
|
document.getEndnoteByID(ftn.getId().intValue());
|
||||||
|
|
||||||
for (CTSdtRun sdt : paragraph.getSdtList()) {
|
boolean first = true;
|
||||||
CTSdtContentRun run = sdt.getSdtContent();
|
for (XWPFParagraph p : footnote.getParagraphs()) {
|
||||||
rs.addAll( run.getRList() );
|
if (!first) {
|
||||||
}
|
footnoteText.append("\n");
|
||||||
for (CTRunTrackChange c : paragraph.getDelList()) {
|
first = false;
|
||||||
rs.addAll( c.getRList() );
|
}
|
||||||
}
|
footnoteText.append(p.getText());
|
||||||
for (CTRunTrackChange c : paragraph.getInsList()) {
|
}
|
||||||
rs.addAll( c.getRList() );
|
|
||||||
}
|
|
||||||
for (CTSimpleField f : paragraph.getFldSimpleList()) {
|
|
||||||
rs.addAll( f.getRList() );
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get text of the paragraph
|
footnoteText.append("]");
|
||||||
for (int j = 0; j < rs.size(); j++) {
|
}
|
||||||
// Grab the text and tabs of the paragraph
|
}
|
||||||
// Do so in a way that preserves the ordering
|
|
||||||
XmlCursor c = rs.get(j).newCursor();
|
|
||||||
c.selectPath("./*");
|
|
||||||
while (c.toNextSelection()) {
|
|
||||||
XmlObject o = c.getObject();
|
|
||||||
if (o instanceof CTText) {
|
|
||||||
String tagName = o.getDomNode().getNodeName();
|
|
||||||
// Field Codes (w:instrText, defined in spec sec. 17.16.23)
|
|
||||||
// come up as instances of CTText, but we don't want them
|
|
||||||
// in the normal text output
|
|
||||||
if (!"w:instrText".equals(tagName)) {
|
|
||||||
text.append(((CTText) o).getStringValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (o instanceof CTPTab) {
|
|
||||||
text.append("\t");
|
|
||||||
}
|
|
||||||
if (o instanceof CTEmpty) {
|
|
||||||
// Some inline text elements get returned not as
|
|
||||||
// themselves, but as CTEmpty, owing to some odd
|
|
||||||
// definitions around line 5642 of the XSDs
|
|
||||||
String tagName = o.getDomNode().getNodeName();
|
|
||||||
if ("w:tab".equals(tagName)) {
|
|
||||||
text.append("\t");
|
|
||||||
}
|
|
||||||
if ("w:cr".equals(tagName)) {
|
|
||||||
text.append("\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for bits that only apply when
|
|
||||||
// attached to a core document
|
|
||||||
if(document != null) {
|
|
||||||
//got a reference to a footnote
|
|
||||||
if (o instanceof CTFtnEdnRef) {
|
|
||||||
CTFtnEdnRef ftn = (CTFtnEdnRef) o;
|
|
||||||
footnoteText.append("[").append(ftn.getId()).append(": ");
|
|
||||||
XWPFFootnote footnote =
|
|
||||||
ftn.getDomNode().getLocalName().equals("footnoteReference") ?
|
|
||||||
document.getFootnoteByID(ftn.getId().intValue()) :
|
|
||||||
document.getEndnoteByID(ftn.getId().intValue());
|
|
||||||
|
|
||||||
boolean first = true;
|
|
||||||
for (XWPFParagraph p : footnote.getParagraphs()) {
|
|
||||||
if (!first) {
|
|
||||||
footnoteText.append("\n");
|
|
||||||
first = false;
|
|
||||||
}
|
|
||||||
footnoteText.append(p.getText());
|
|
||||||
}
|
|
||||||
|
|
||||||
footnoteText.append("]");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Loop over pictures inside our
|
// Loop over pictures inside our
|
||||||
// paragraph, looking for text in them
|
// paragraph, looking for text in them
|
||||||
for(CTPicture pict : rs.get(j).getPictList()) {
|
for(CTPicture pict : r.getPictList()) {
|
||||||
XmlObject[] t = pict
|
XmlObject[] t = pict
|
||||||
.selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t");
|
.selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t");
|
||||||
for (int m = 0; m < t.length; m++) {
|
for (int m = 0; m < t.length; m++) {
|
||||||
@ -200,9 +168,6 @@ public class XWPFParagraph implements IBodyElement{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this.text = text;
|
|
||||||
return text.toString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Internal
|
@Internal
|
||||||
@ -228,7 +193,10 @@ public class XWPFParagraph implements IBodyElement{
|
|||||||
*/
|
*/
|
||||||
public String getText() {
|
public String getText() {
|
||||||
StringBuffer out = new StringBuffer();
|
StringBuffer out = new StringBuffer();
|
||||||
out.append(text).append(footnoteText).append(pictureText);
|
for(XWPFRun run : runs) {
|
||||||
|
out.append(run.toString());
|
||||||
|
}
|
||||||
|
out.append(footnoteText).append(pictureText);
|
||||||
return out.toString();
|
return out.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -282,7 +250,11 @@ public class XWPFParagraph implements IBodyElement{
|
|||||||
* paragraph
|
* paragraph
|
||||||
*/
|
*/
|
||||||
public String getParagraphText() {
|
public String getParagraphText() {
|
||||||
return text.toString();
|
StringBuffer out = new StringBuffer();
|
||||||
|
for(XWPFRun run : runs) {
|
||||||
|
out.append(run.toString());
|
||||||
|
}
|
||||||
|
return out.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1143,9 +1115,6 @@ public class XWPFParagraph implements IBodyElement{
|
|||||||
pos = paragraph.getRList().size();
|
pos = paragraph.getRList().size();
|
||||||
paragraph.addNewR();
|
paragraph.addNewR();
|
||||||
paragraph.setRArray(pos, run);
|
paragraph.setRArray(pos, run);
|
||||||
for (CTText ctText: paragraph.getRArray(pos).getTList()) {
|
|
||||||
this.text.append(ctText.getStringValue());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -19,12 +19,15 @@ package org.apache.poi.xwpf.usermodel;
|
|||||||
import java.math.BigInteger;
|
import java.math.BigInteger;
|
||||||
|
|
||||||
import org.apache.poi.util.Internal;
|
import org.apache.poi.util.Internal;
|
||||||
|
import org.apache.xmlbeans.XmlObject;
|
||||||
import org.apache.xmlbeans.XmlString;
|
import org.apache.xmlbeans.XmlString;
|
||||||
import org.apache.xmlbeans.XmlCursor;
|
import org.apache.xmlbeans.XmlCursor;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBr;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBr;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTEmpty;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFonts;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFonts;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHpsMeasure;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHpsMeasure;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTOnOff;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTOnOff;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSignedHpsMeasure;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSignedHpsMeasure;
|
||||||
@ -492,4 +495,45 @@ public class XWPFRun {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the string version of the text, with tabs and
|
||||||
|
* carriage returns in place of their xml equivalents.
|
||||||
|
*/
|
||||||
|
public String toString() {
|
||||||
|
StringBuffer text = new StringBuffer();
|
||||||
|
|
||||||
|
// Grab the text and tabs of the text run
|
||||||
|
// Do so in a way that preserves the ordering
|
||||||
|
XmlCursor c = run.newCursor();
|
||||||
|
c.selectPath("./*");
|
||||||
|
while (c.toNextSelection()) {
|
||||||
|
XmlObject o = c.getObject();
|
||||||
|
if (o instanceof CTText) {
|
||||||
|
String tagName = o.getDomNode().getNodeName();
|
||||||
|
// Field Codes (w:instrText, defined in spec sec. 17.16.23)
|
||||||
|
// come up as instances of CTText, but we don't want them
|
||||||
|
// in the normal text output
|
||||||
|
if (!"w:instrText".equals(tagName)) {
|
||||||
|
text.append(((CTText) o).getStringValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (o instanceof CTPTab) {
|
||||||
|
text.append("\t");
|
||||||
|
}
|
||||||
|
if (o instanceof CTEmpty) {
|
||||||
|
// Some inline text elements get returned not as
|
||||||
|
// themselves, but as CTEmpty, owing to some odd
|
||||||
|
// definitions around line 5642 of the XSDs
|
||||||
|
String tagName = o.getDomNode().getNodeName();
|
||||||
|
if ("w:tab".equals(tagName)) {
|
||||||
|
text.append("\t");
|
||||||
|
}
|
||||||
|
if ("w:cr".equals(tagName)) {
|
||||||
|
text.append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return text.toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -327,7 +327,7 @@ public class XWPFTableCell implements IBody {
|
|||||||
public String getText(){
|
public String getText(){
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuffer text = new StringBuffer();
|
||||||
for (XWPFParagraph p : paragraphs) {
|
for (XWPFParagraph p : paragraphs) {
|
||||||
text.append(p.readNewText());
|
text.append(p.getText());
|
||||||
}
|
}
|
||||||
return text.toString();
|
return text.toString();
|
||||||
}
|
}
|
||||||
|
@ -96,22 +96,18 @@ public class TestXWPFWordExtractor extends TestCase {
|
|||||||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
|
||||||
|
|
||||||
// Now check contents
|
// Now check contents
|
||||||
// TODO - fix once correctly handling contents
|
|
||||||
extractor.setFetchHyperlinks(false);
|
extractor.setFetchHyperlinks(false);
|
||||||
assertEquals(
|
assertEquals(
|
||||||
// "This is a test document\nThis bit is in bold and italic\n" +
|
"This is a test document\nThis bit is in bold and italic\n" +
|
||||||
// "Back to normal\nWe have a hyperlink here, and another.\n",
|
"Back to normal\nWe have a hyperlink here, and another.\n",
|
||||||
"This is a test document\nThis bit is in bold and italic\n" +
|
|
||||||
"Back to normal\nWe have a here, and .hyperlinkanother\n",
|
|
||||||
extractor.getText()
|
extractor.getText()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// One hyperlink is a real one, one is just to the top of page
|
||||||
extractor.setFetchHyperlinks(true);
|
extractor.setFetchHyperlinks(true);
|
||||||
assertEquals(
|
assertEquals(
|
||||||
// "This is a test document\nThis bit is in bold and italic\n" +
|
"This is a test document\nThis bit is in bold and italic\n" +
|
||||||
// "Back to normal\nWe have a hyperlink here, and another.\n",
|
"Back to normal\nWe have a hyperlink <http://poi.apache.org/> here, and another.\n",
|
||||||
"This is a test document\nThis bit is in bold and italic\n" +
|
|
||||||
"Back to normal\nWe have a here, and .hyperlink <http://poi.apache.org/>another\n",
|
|
||||||
extractor.getText()
|
extractor.getText()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -144,7 +144,7 @@ public class TestXWPFHeaderFooterPolicy extends TestCase {
|
|||||||
policy = oddEven.getHeaderFooterPolicy();
|
policy = oddEven.getHeaderFooterPolicy();
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"[]ODD Page Header text\n\n",
|
"[ODD Page Header text]\n\n",
|
||||||
policy.getDefaultHeader().getText()
|
policy.getDefaultHeader().getText()
|
||||||
);
|
);
|
||||||
assertEquals(
|
assertEquals(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user