Patch from Fabian from bug #52285 - support Smart Tags in XWPF paragraphs, with test (and fixing indents)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1210774 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9068469566
commit
693a7ddc09
@ -34,6 +34,7 @@
|
||||
|
||||
<changes>
|
||||
<release version="3.8-beta5" date="2011-??-??">
|
||||
<action dev="poi-developers" type="add">52285 - Support XWPF smart tags text in Paragraphs</action>
|
||||
<action dev="poi-developers" type="fix">51875 - More XSSF new-line in formula support</action>
|
||||
<action dev="poi-developers" type="add">POIFS EntryUtils.copyNodes(POFS,POIFS) now uses FilteringDirectoryNode, so can exclude from copying nodes not just directly under the root</action>
|
||||
<action dev="poi-developers" type="add">POIFS Helper FilteringDirectoryNode, which wraps a DirectoryEntry and allows certain parts to be ignored</action>
|
||||
|
@ -41,6 +41,7 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRunTrackChange;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSimpleField;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSmartTagRun;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSpacing;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTString;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
||||
@ -73,87 +74,95 @@ public class XWPFParagraph implements IBodyElement {
|
||||
throw new NullPointerException();
|
||||
}
|
||||
|
||||
// Build up the character runs
|
||||
runs = new ArrayList<XWPFRun>();
|
||||
buildRunsInOrderFromXml(paragraph);
|
||||
|
||||
// Get all our child nodes in order, and process them
|
||||
// into XWPFRuns where we can
|
||||
XmlCursor c = paragraph.newCursor();
|
||||
c.selectPath("child::*");
|
||||
while (c.toNextSelection()) {
|
||||
XmlObject o = c.getObject();
|
||||
if(o instanceof CTR) {
|
||||
runs.add(new XWPFRun((CTR)o, this));
|
||||
}
|
||||
if(o instanceof CTHyperlink) {
|
||||
CTHyperlink link = (CTHyperlink)o;
|
||||
for(CTR r : link.getRList()) {
|
||||
runs.add(new XWPFHyperlinkRun(link, r, this));
|
||||
}
|
||||
}
|
||||
if(o instanceof CTSdtRun) {
|
||||
CTSdtContentRun run = ((CTSdtRun)o).getSdtContent();
|
||||
for(CTR r : run.getRList()) {
|
||||
runs.add(new XWPFRun(r, this));
|
||||
}
|
||||
}
|
||||
if(o instanceof CTRunTrackChange) {
|
||||
for(CTR r : ((CTRunTrackChange)o).getRList()) {
|
||||
runs.add(new XWPFRun(r, this));
|
||||
}
|
||||
}
|
||||
if(o instanceof CTSimpleField) {
|
||||
for(CTR r : ((CTSimpleField)o).getRList()) {
|
||||
runs.add(new XWPFRun(r, this));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Look for bits associated with the runs
|
||||
for(XWPFRun run : runs) {
|
||||
CTR r = run.getCTR();
|
||||
|
||||
c.dispose();
|
||||
|
||||
// Look for bits associated with the runs
|
||||
for(XWPFRun run : runs) {
|
||||
CTR r = run.getCTR();
|
||||
|
||||
// Check for bits that only apply when
|
||||
// attached to a core document
|
||||
// TODO Make this nicer by tracking the XWPFFootnotes directly
|
||||
if(document != null) {
|
||||
c = r.newCursor();
|
||||
c.selectPath("child::*");
|
||||
while (c.toNextSelection()) {
|
||||
// Check for bits that only apply when attached to a core document
|
||||
// TODO Make this nicer by tracking the XWPFFootnotes directly
|
||||
XmlCursor c = r.newCursor();
|
||||
c.selectPath("child::*");
|
||||
while (c.toNextSelection()) {
|
||||
XmlObject o = c.getObject();
|
||||
if(o instanceof CTFtnEdnRef) {
|
||||
CTFtnEdnRef ftn = (CTFtnEdnRef)o;
|
||||
footnoteText.append("[").append(ftn.getId()).append(": ");
|
||||
XWPFFootnote footnote =
|
||||
ftn.getDomNode().getLocalName().equals("footnoteReference") ?
|
||||
CTFtnEdnRef ftn = (CTFtnEdnRef)o;
|
||||
footnoteText.append("[").append(ftn.getId()).append(": ");
|
||||
XWPFFootnote footnote =
|
||||
ftn.getDomNode().getLocalName().equals("footnoteReference") ?
|
||||
document.getFootnoteByID(ftn.getId().intValue()) :
|
||||
document.getEndnoteByID(ftn.getId().intValue());
|
||||
|
||||
boolean first = true;
|
||||
for (XWPFParagraph p : footnote.getParagraphs()) {
|
||||
if (!first) {
|
||||
footnoteText.append("\n");
|
||||
first = false;
|
||||
}
|
||||
footnoteText.append(p.getText());
|
||||
}
|
||||
|
||||
footnoteText.append("]");
|
||||
|
||||
boolean first = true;
|
||||
for (XWPFParagraph p : footnote.getParagraphs()) {
|
||||
if (!first) {
|
||||
footnoteText.append("\n");
|
||||
first = false;
|
||||
}
|
||||
footnoteText.append(p.getText());
|
||||
}
|
||||
|
||||
footnoteText.append("]");
|
||||
}
|
||||
}
|
||||
c.dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
c.dispose();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Identifies (in order) the parts of the paragraph /
|
||||
* sub-paragraph that correspond to character text
|
||||
* runs, and builds the appropriate runs for these.
|
||||
*/
|
||||
private void buildRunsInOrderFromXml(XmlObject object) {
|
||||
XmlCursor c = object.newCursor();
|
||||
c.selectPath("child::*");
|
||||
while (c.toNextSelection()) {
|
||||
XmlObject o = c.getObject();
|
||||
if (o instanceof CTR) {
|
||||
runs.add(new XWPFRun((CTR) o, this));
|
||||
}
|
||||
if (o instanceof CTHyperlink) {
|
||||
CTHyperlink link = (CTHyperlink) o;
|
||||
for (CTR r : link.getRList()) {
|
||||
runs.add(new XWPFHyperlinkRun(link, r, this));
|
||||
}
|
||||
}
|
||||
if (o instanceof CTSdtRun) {
|
||||
CTSdtContentRun run = ((CTSdtRun) o).getSdtContent();
|
||||
for (CTR r : run.getRList()) {
|
||||
runs.add(new XWPFRun(r, this));
|
||||
}
|
||||
}
|
||||
if (o instanceof CTRunTrackChange) {
|
||||
for (CTR r : ((CTRunTrackChange) o).getRList()) {
|
||||
runs.add(new XWPFRun(r, this));
|
||||
}
|
||||
}
|
||||
if (o instanceof CTSimpleField) {
|
||||
for (CTR r : ((CTSimpleField) o).getRList()) {
|
||||
runs.add(new XWPFRun(r, this));
|
||||
}
|
||||
}
|
||||
if (o instanceof CTSmartTagRun) {
|
||||
// Smart Tags can be nested many times.
|
||||
// This implementation does not preserve the tagging information
|
||||
buildRunsInOrderFromXml(o);
|
||||
}
|
||||
}
|
||||
c.dispose();
|
||||
}
|
||||
|
||||
@Internal
|
||||
public CTP getCTP() {
|
||||
return paragraph;
|
||||
}
|
||||
|
||||
public List<XWPFRun> getRuns(){
|
||||
return Collections.unmodifiableList(runs);
|
||||
return Collections.unmodifiableList(runs);
|
||||
}
|
||||
|
||||
public boolean isEmpty(){
|
||||
@ -176,35 +185,35 @@ public class XWPFParagraph implements IBodyElement {
|
||||
out.append(footnoteText);
|
||||
return out.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return styleID of the paragraph if style exist for this paragraph
|
||||
* if not, null will be returned
|
||||
* @return styleID as String
|
||||
*/
|
||||
|
||||
/**
|
||||
* Return styleID of the paragraph if style exist for this paragraph
|
||||
* if not, null will be returned
|
||||
* @return styleID as String
|
||||
*/
|
||||
public String getStyleID(){
|
||||
if (paragraph.getPPr() != null){
|
||||
if(paragraph.getPPr().getPStyle()!= null){
|
||||
if (paragraph.getPPr().getPStyle().getVal()!= null)
|
||||
return paragraph.getPPr().getPStyle().getVal();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (paragraph.getPPr() != null){
|
||||
if(paragraph.getPPr().getPStyle()!= null){
|
||||
if (paragraph.getPPr().getPStyle().getVal()!= null)
|
||||
return paragraph.getPPr().getPStyle().getVal();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
/**
|
||||
* If style exist for this paragraph
|
||||
* NumId of the paragraph will be returned.
|
||||
* If style not exist null will be returned
|
||||
* @return NumID as BigInteger
|
||||
* If style not exist null will be returned
|
||||
* @return NumID as BigInteger
|
||||
*/
|
||||
public BigInteger getNumID(){
|
||||
if(paragraph.getPPr()!=null){
|
||||
if(paragraph.getPPr().getNumPr()!=null){
|
||||
if(paragraph.getPPr().getNumPr().getNumId()!=null)
|
||||
return paragraph.getPPr().getNumPr().getNumId().getVal();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
if(paragraph.getPPr()!=null){
|
||||
if(paragraph.getPPr().getNumPr()!=null){
|
||||
if(paragraph.getPPr().getNumPr().getNumId()!=null)
|
||||
return paragraph.getPPr().getNumPr().getNumId().getVal();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -212,14 +221,14 @@ public class XWPFParagraph implements IBodyElement {
|
||||
* @param numPos
|
||||
*/
|
||||
public void setNumID(BigInteger numPos) {
|
||||
if(paragraph.getPPr()==null)
|
||||
paragraph.addNewPPr();
|
||||
if(paragraph.getPPr().getNumPr()==null)
|
||||
paragraph.getPPr().addNewNumPr();
|
||||
if(paragraph.getPPr().getNumPr().getNumId()==null){
|
||||
paragraph.getPPr().getNumPr().addNewNumId();
|
||||
}
|
||||
paragraph.getPPr().getNumPr().getNumId().setVal(numPos);
|
||||
if(paragraph.getPPr()==null)
|
||||
paragraph.addNewPPr();
|
||||
if(paragraph.getPPr().getNumPr()==null)
|
||||
paragraph.getPPr().addNewNumPr();
|
||||
if(paragraph.getPPr().getNumPr().getNumId()==null){
|
||||
paragraph.getPPr().getNumPr().addNewNumId();
|
||||
}
|
||||
paragraph.getPPr().getNumPr().getNumId().setVal(numPos);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1027,18 +1036,18 @@ public class XWPFParagraph implements IBodyElement {
|
||||
* @param newStyle
|
||||
*/
|
||||
public void setStyle(String newStyle) {
|
||||
CTPPr pr = getCTPPr();
|
||||
CTString style = pr.getPStyle() != null ? pr.getPStyle() : pr.addNewPStyle();
|
||||
style.setVal(newStyle);
|
||||
CTPPr pr = getCTPPr();
|
||||
CTString style = pr.getPStyle() != null ? pr.getPStyle() : pr.addNewPStyle();
|
||||
style.setVal(newStyle);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the style of the paragraph
|
||||
*/
|
||||
public String getStyle() {
|
||||
CTPPr pr = getCTPPr();
|
||||
CTString style = pr.isSetPStyle() ? pr.getPStyle() : null;
|
||||
return style != null ? style.getVal() : null;
|
||||
CTPPr pr = getCTPPr();
|
||||
CTString style = pr.isSetPStyle() ? pr.getPStyle() : null;
|
||||
return style != null ? style.getVal() : null;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1094,10 +1103,10 @@ public class XWPFParagraph implements IBodyElement {
|
||||
* @param run
|
||||
*/
|
||||
protected void addRun(CTR run){
|
||||
int pos;
|
||||
pos = paragraph.getRList().size();
|
||||
paragraph.addNewR();
|
||||
paragraph.setRArray(pos, run);
|
||||
int pos;
|
||||
pos = paragraph.getRList().size();
|
||||
paragraph.addNewR();
|
||||
paragraph.setRArray(pos, run);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1108,65 +1117,65 @@ public class XWPFParagraph implements IBodyElement {
|
||||
* @param startPos
|
||||
*/
|
||||
public TextSegement searchText(String searched,PositionInParagraph startPos){
|
||||
|
||||
int startRun = startPos.getRun(),
|
||||
startText = startPos.getText(),
|
||||
startChar = startPos.getChar();
|
||||
int beginRunPos = 0, candCharPos = 0;
|
||||
boolean newList = false;
|
||||
for (int runPos=startRun; runPos<paragraph.getRList().size(); runPos++) {
|
||||
int beginTextPos = 0,beginCharPos = 0, textPos = 0, charPos = 0;
|
||||
CTR ctRun = paragraph.getRArray(runPos);
|
||||
XmlCursor c = ctRun.newCursor();
|
||||
c.selectPath("./*");
|
||||
while(c.toNextSelection()){
|
||||
XmlObject o = c.getObject();
|
||||
if(o instanceof CTText){
|
||||
if(textPos>=startText){
|
||||
String candidate = ((CTText)o).getStringValue();
|
||||
if(runPos==startRun)
|
||||
charPos= startChar;
|
||||
else
|
||||
charPos = 0;
|
||||
for(; charPos<candidate.length(); charPos++){
|
||||
if((candidate.charAt(charPos)==searched.charAt(0))&&(candCharPos==0)){
|
||||
beginTextPos = textPos;
|
||||
beginCharPos = charPos;
|
||||
beginRunPos = runPos;
|
||||
newList = true;
|
||||
}
|
||||
if(candidate.charAt(charPos)==searched.charAt(candCharPos)){
|
||||
if(candCharPos+1<searched.length())
|
||||
candCharPos++;
|
||||
else if(newList){
|
||||
TextSegement segement = new TextSegement();
|
||||
segement.setBeginRun(beginRunPos);
|
||||
segement.setBeginText(beginTextPos);
|
||||
segement.setBeginChar(beginCharPos);
|
||||
segement.setEndRun(runPos);
|
||||
segement.setEndText(textPos);
|
||||
segement.setEndChar(charPos);
|
||||
return segement;
|
||||
}
|
||||
}
|
||||
else
|
||||
candCharPos=0;
|
||||
}
|
||||
}
|
||||
textPos++;
|
||||
}
|
||||
else if(o instanceof CTProofErr){
|
||||
c.removeXml();
|
||||
}
|
||||
else if(o instanceof CTRPr);
|
||||
//do nothing
|
||||
else
|
||||
candCharPos=0;
|
||||
}
|
||||
|
||||
int startRun = startPos.getRun(),
|
||||
startText = startPos.getText(),
|
||||
startChar = startPos.getChar();
|
||||
int beginRunPos = 0, candCharPos = 0;
|
||||
boolean newList = false;
|
||||
for (int runPos=startRun; runPos<paragraph.getRList().size(); runPos++) {
|
||||
int beginTextPos = 0,beginCharPos = 0, textPos = 0, charPos = 0;
|
||||
CTR ctRun = paragraph.getRArray(runPos);
|
||||
XmlCursor c = ctRun.newCursor();
|
||||
c.selectPath("./*");
|
||||
while(c.toNextSelection()){
|
||||
XmlObject o = c.getObject();
|
||||
if(o instanceof CTText){
|
||||
if(textPos>=startText){
|
||||
String candidate = ((CTText)o).getStringValue();
|
||||
if(runPos==startRun)
|
||||
charPos= startChar;
|
||||
else
|
||||
charPos = 0;
|
||||
for(; charPos<candidate.length(); charPos++){
|
||||
if((candidate.charAt(charPos)==searched.charAt(0))&&(candCharPos==0)){
|
||||
beginTextPos = textPos;
|
||||
beginCharPos = charPos;
|
||||
beginRunPos = runPos;
|
||||
newList = true;
|
||||
}
|
||||
if(candidate.charAt(charPos)==searched.charAt(candCharPos)){
|
||||
if(candCharPos+1<searched.length())
|
||||
candCharPos++;
|
||||
else if(newList){
|
||||
TextSegement segement = new TextSegement();
|
||||
segement.setBeginRun(beginRunPos);
|
||||
segement.setBeginText(beginTextPos);
|
||||
segement.setBeginChar(beginCharPos);
|
||||
segement.setEndRun(runPos);
|
||||
segement.setEndText(textPos);
|
||||
segement.setEndChar(charPos);
|
||||
return segement;
|
||||
}
|
||||
}
|
||||
else
|
||||
candCharPos=0;
|
||||
}
|
||||
}
|
||||
textPos++;
|
||||
}
|
||||
else if(o instanceof CTProofErr){
|
||||
c.removeXml();
|
||||
}
|
||||
else if(o instanceof CTRPr);
|
||||
//do nothing
|
||||
else
|
||||
candCharPos=0;
|
||||
}
|
||||
|
||||
c.dispose();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1175,13 +1184,13 @@ public class XWPFParagraph implements IBodyElement {
|
||||
* @return the inserted run
|
||||
*/
|
||||
public XWPFRun insertNewRun(int pos){
|
||||
if (pos >= 0 && pos <= paragraph.sizeOfRArray()) {
|
||||
CTR ctRun = paragraph.insertNewR(pos);
|
||||
XWPFRun newRun = new XWPFRun(ctRun, this);
|
||||
runs.add(pos, newRun);
|
||||
return newRun;
|
||||
}
|
||||
return null;
|
||||
if (pos >= 0 && pos <= paragraph.sizeOfRArray()) {
|
||||
CTR ctRun = paragraph.insertNewR(pos);
|
||||
XWPFRun newRun = new XWPFRun(ctRun, this);
|
||||
runs.add(pos, newRun);
|
||||
return newRun;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@ -1196,27 +1205,27 @@ public class XWPFParagraph implements IBodyElement {
|
||||
int charBegin = segment.getBeginChar();
|
||||
int runEnd = segment.getEndRun();
|
||||
int textEnd = segment.getEndText();
|
||||
int charEnd = segment.getEndChar();
|
||||
int charEnd = segment.getEndChar();
|
||||
StringBuffer out = new StringBuffer();
|
||||
for(int i=runBegin; i<=runEnd;i++){
|
||||
int startText=0, endText = paragraph.getRArray(i).getTList().size()-1;
|
||||
if(i==runBegin)
|
||||
startText=textBegin;
|
||||
if(i==runEnd)
|
||||
endText = textEnd;
|
||||
for(int j=startText;j<=endText;j++){
|
||||
String tmpText = paragraph.getRArray(i).getTArray(j).getStringValue();
|
||||
int startChar=0, endChar = tmpText.length()-1;
|
||||
if((j==textBegin)&&(i==runBegin))
|
||||
startChar=charBegin;
|
||||
if((j==textEnd)&&(i==runEnd)){
|
||||
endChar = charEnd;
|
||||
}
|
||||
out.append(tmpText.substring(startChar, endChar+1));
|
||||
|
||||
}
|
||||
}
|
||||
return out.toString();
|
||||
for(int i=runBegin; i<=runEnd;i++){
|
||||
int startText=0, endText = paragraph.getRArray(i).getTList().size()-1;
|
||||
if(i==runBegin)
|
||||
startText=textBegin;
|
||||
if(i==runEnd)
|
||||
endText = textEnd;
|
||||
for(int j=startText;j<=endText;j++){
|
||||
String tmpText = paragraph.getRArray(i).getTArray(j).getStringValue();
|
||||
int startChar=0, endChar = tmpText.length()-1;
|
||||
if((j==textBegin)&&(i==runBegin))
|
||||
startChar=charBegin;
|
||||
if((j==textEnd)&&(i==runEnd)){
|
||||
endChar = charEnd;
|
||||
}
|
||||
out.append(tmpText.substring(startChar, endChar+1));
|
||||
|
||||
}
|
||||
}
|
||||
return out.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1225,12 +1234,12 @@ public class XWPFParagraph implements IBodyElement {
|
||||
* @return true if the run was removed
|
||||
*/
|
||||
public boolean removeRun(int pos){
|
||||
if (pos >= 0 && pos < paragraph.sizeOfRArray()){
|
||||
getCTP().removeR(pos);
|
||||
runs.remove(pos);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
if (pos >= 0 && pos < paragraph.sizeOfRArray()){
|
||||
getCTP().removeR(pos);
|
||||
runs.remove(pos);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -0,0 +1,39 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xwpf.usermodel;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.poi.xwpf.XWPFTestDataSamples;
|
||||
|
||||
/**
|
||||
* Tests for reading SmartTags from Word docx.
|
||||
*
|
||||
* @author Fabian Lange
|
||||
*/
|
||||
public final class TestXWPFSmartTag extends TestCase {
|
||||
|
||||
public void testSmartTags() throws IOException {
|
||||
XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("smarttag-snippet.docx");
|
||||
XWPFParagraph p = doc.getParagraphArray(0);
|
||||
assertTrue(p.getText().contains("Carnegie Mellon University School of Computer Science"));
|
||||
p = doc.getParagraphArray(2);
|
||||
assertTrue(p.getText().contains("Alice's Adventures"));
|
||||
}
|
||||
}
|
BIN
test-data/document/smarttag-snippet.docx
Normal file
BIN
test-data/document/smarttag-snippet.docx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user