use StringBuilder in more places
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1813869 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
bf5a0c4c9e
commit
34006a542e
File diff suppressed because it is too large
Load Diff
@ -30,211 +30,206 @@ import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProper
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* A {@link POITextExtractor} for returning the textual
|
* A {@link POITextExtractor} for returning the textual
|
||||||
* content of the OOXML file properties, eg author
|
* content of the OOXML file properties, eg author
|
||||||
* and title.
|
* and title.
|
||||||
*/
|
*/
|
||||||
public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
|
public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
|
||||||
|
|
||||||
private final DateFormat dateFormat;
|
private final DateFormat dateFormat;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new POIXMLPropertiesTextExtractor for the given open document.
|
* Creates a new POIXMLPropertiesTextExtractor for the given open document.
|
||||||
*
|
*
|
||||||
* @param doc the given open document
|
* @param doc the given open document
|
||||||
*/
|
*/
|
||||||
public POIXMLPropertiesTextExtractor(POIXMLDocument doc) {
|
public POIXMLPropertiesTextExtractor(POIXMLDocument doc) {
|
||||||
super(doc);
|
super(doc);
|
||||||
DateFormatSymbols dfs = DateFormatSymbols.getInstance(Locale.ROOT);
|
DateFormatSymbols dfs = DateFormatSymbols.getInstance(Locale.ROOT);
|
||||||
dateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss zzz yyyy", dfs);
|
dateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss zzz yyyy", dfs);
|
||||||
dateFormat.setTimeZone(LocaleUtil.TIMEZONE_UTC);
|
dateFormat.setTimeZone(LocaleUtil.TIMEZONE_UTC);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a new POIXMLPropertiesTextExtractor, for the
|
|
||||||
* same file that another TextExtractor is already
|
|
||||||
* working on.
|
|
||||||
*
|
|
||||||
* @param otherExtractor the extractor referencing the given file
|
|
||||||
*/
|
|
||||||
public POIXMLPropertiesTextExtractor(POIXMLTextExtractor otherExtractor) {
|
|
||||||
this(otherExtractor.getDocument());
|
|
||||||
}
|
|
||||||
|
|
||||||
private void appendIfPresent(StringBuffer text, String thing, boolean value) {
|
|
||||||
appendIfPresent(text, thing, Boolean.toString(value));
|
|
||||||
}
|
|
||||||
private void appendIfPresent(StringBuffer text, String thing, int value) {
|
|
||||||
appendIfPresent(text, thing, Integer.toString(value));
|
|
||||||
}
|
|
||||||
private void appendIfPresent(StringBuffer text, String thing, Date value) {
|
|
||||||
if(value == null) { return; }
|
|
||||||
appendIfPresent(text, thing, dateFormat.format(value));
|
|
||||||
}
|
|
||||||
private void appendIfPresent(StringBuffer text, String thing, String value) {
|
|
||||||
if(value == null) { return; }
|
|
||||||
text.append(thing);
|
|
||||||
text.append(" = ");
|
|
||||||
text.append(value);
|
|
||||||
text.append("\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the core document properties, eg author
|
* Creates a new POIXMLPropertiesTextExtractor, for the
|
||||||
*
|
* same file that another TextExtractor is already
|
||||||
* @return the core document properties
|
* working on.
|
||||||
*/
|
*
|
||||||
@SuppressWarnings("resource")
|
* @param otherExtractor the extractor referencing the given file
|
||||||
|
*/
|
||||||
|
public POIXMLPropertiesTextExtractor(POIXMLTextExtractor otherExtractor) {
|
||||||
|
this(otherExtractor.getDocument());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void appendIfPresent(StringBuilder text, String thing, boolean value) {
|
||||||
|
appendIfPresent(text, thing, Boolean.toString(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void appendIfPresent(StringBuilder text, String thing, int value) {
|
||||||
|
appendIfPresent(text, thing, Integer.toString(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void appendIfPresent(StringBuilder text, String thing, Date value) {
|
||||||
|
if (value == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
appendIfPresent(text, thing, dateFormat.format(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void appendIfPresent(StringBuilder text, String thing, String value) {
|
||||||
|
if (value == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
text.append(thing);
|
||||||
|
text.append(" = ");
|
||||||
|
text.append(value);
|
||||||
|
text.append("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the core document properties, eg author
|
||||||
|
*
|
||||||
|
* @return the core document properties
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("resource")
|
||||||
public String getCorePropertiesText() {
|
public String getCorePropertiesText() {
|
||||||
POIXMLDocument document = getDocument();
|
POIXMLDocument document = getDocument();
|
||||||
if(document == null) { // event based extractor does not have a document
|
if (document == null) { // event based extractor does not have a document
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuilder text = new StringBuilder(64);
|
||||||
PackagePropertiesPart props =
|
PackagePropertiesPart props =
|
||||||
document.getProperties().getCoreProperties().getUnderlyingProperties();
|
document.getProperties().getCoreProperties().getUnderlyingProperties();
|
||||||
|
|
||||||
appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
|
appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
|
||||||
appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
|
appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
|
||||||
appendIfPresent(text, "ContentStatus", props.getContentStatusProperty().getValue());
|
appendIfPresent(text, "ContentStatus", props.getContentStatusProperty().getValue());
|
||||||
appendIfPresent(text, "ContentType", props.getContentTypeProperty().getValue());
|
appendIfPresent(text, "ContentType", props.getContentTypeProperty().getValue());
|
||||||
appendIfPresent(text, "Created", props.getCreatedProperty().getValue());
|
appendIfPresent(text, "Created", props.getCreatedProperty().getValue());
|
||||||
appendIfPresent(text, "CreatedString", props.getCreatedPropertyString());
|
appendIfPresent(text, "CreatedString", props.getCreatedPropertyString());
|
||||||
appendIfPresent(text, "Creator", props.getCreatorProperty().getValue());
|
appendIfPresent(text, "Creator", props.getCreatorProperty().getValue());
|
||||||
appendIfPresent(text, "Description", props.getDescriptionProperty().getValue());
|
appendIfPresent(text, "Description", props.getDescriptionProperty().getValue());
|
||||||
appendIfPresent(text, "Identifier", props.getIdentifierProperty().getValue());
|
appendIfPresent(text, "Identifier", props.getIdentifierProperty().getValue());
|
||||||
appendIfPresent(text, "Keywords", props.getKeywordsProperty().getValue());
|
appendIfPresent(text, "Keywords", props.getKeywordsProperty().getValue());
|
||||||
appendIfPresent(text, "Language", props.getLanguageProperty().getValue());
|
appendIfPresent(text, "Language", props.getLanguageProperty().getValue());
|
||||||
appendIfPresent(text, "LastModifiedBy", props.getLastModifiedByProperty().getValue());
|
appendIfPresent(text, "LastModifiedBy", props.getLastModifiedByProperty().getValue());
|
||||||
appendIfPresent(text, "LastPrinted", props.getLastPrintedProperty().getValue());
|
appendIfPresent(text, "LastPrinted", props.getLastPrintedProperty().getValue());
|
||||||
appendIfPresent(text, "LastPrintedString", props.getLastPrintedPropertyString());
|
appendIfPresent(text, "LastPrintedString", props.getLastPrintedPropertyString());
|
||||||
appendIfPresent(text, "Modified", props.getModifiedProperty().getValue());
|
appendIfPresent(text, "Modified", props.getModifiedProperty().getValue());
|
||||||
appendIfPresent(text, "ModifiedString", props.getModifiedPropertyString());
|
appendIfPresent(text, "ModifiedString", props.getModifiedPropertyString());
|
||||||
appendIfPresent(text, "Revision", props.getRevisionProperty().getValue());
|
appendIfPresent(text, "Revision", props.getRevisionProperty().getValue());
|
||||||
appendIfPresent(text, "Subject", props.getSubjectProperty().getValue());
|
appendIfPresent(text, "Subject", props.getSubjectProperty().getValue());
|
||||||
appendIfPresent(text, "Title", props.getTitleProperty().getValue());
|
appendIfPresent(text, "Title", props.getTitleProperty().getValue());
|
||||||
appendIfPresent(text, "Version", props.getVersionProperty().getValue());
|
appendIfPresent(text, "Version", props.getVersionProperty().getValue());
|
||||||
|
|
||||||
return text.toString();
|
return text.toString();
|
||||||
}
|
}
|
||||||
/**
|
|
||||||
* Returns the extended document properties, eg application
|
/**
|
||||||
*
|
* Returns the extended document properties, eg application
|
||||||
* @return the extended document properties
|
*
|
||||||
*/
|
* @return the extended document properties
|
||||||
@SuppressWarnings("resource")
|
*/
|
||||||
|
@SuppressWarnings("resource")
|
||||||
public String getExtendedPropertiesText() {
|
public String getExtendedPropertiesText() {
|
||||||
POIXMLDocument document = getDocument();
|
POIXMLDocument document = getDocument();
|
||||||
if(document == null) { // event based extractor does not have a document
|
if (document == null) { // event based extractor does not have a document
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuilder text = new StringBuilder(64);
|
||||||
org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
|
org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
|
||||||
props = document.getProperties().getExtendedProperties().getUnderlyingProperties();
|
props = document.getProperties().getExtendedProperties().getUnderlyingProperties();
|
||||||
|
|
||||||
appendIfPresent(text, "Application", props.getApplication());
|
appendIfPresent(text, "Application", props.getApplication());
|
||||||
appendIfPresent(text, "AppVersion", props.getAppVersion());
|
appendIfPresent(text, "AppVersion", props.getAppVersion());
|
||||||
appendIfPresent(text, "Characters", props.getCharacters());
|
appendIfPresent(text, "Characters", props.getCharacters());
|
||||||
appendIfPresent(text, "CharactersWithSpaces", props.getCharactersWithSpaces());
|
appendIfPresent(text, "CharactersWithSpaces", props.getCharactersWithSpaces());
|
||||||
appendIfPresent(text, "Company", props.getCompany());
|
appendIfPresent(text, "Company", props.getCompany());
|
||||||
appendIfPresent(text, "HyperlinkBase", props.getHyperlinkBase());
|
appendIfPresent(text, "HyperlinkBase", props.getHyperlinkBase());
|
||||||
appendIfPresent(text, "HyperlinksChanged", props.getHyperlinksChanged());
|
appendIfPresent(text, "HyperlinksChanged", props.getHyperlinksChanged());
|
||||||
appendIfPresent(text, "Lines", props.getLines());
|
appendIfPresent(text, "Lines", props.getLines());
|
||||||
appendIfPresent(text, "LinksUpToDate", props.getLinksUpToDate());
|
appendIfPresent(text, "LinksUpToDate", props.getLinksUpToDate());
|
||||||
appendIfPresent(text, "Manager", props.getManager());
|
appendIfPresent(text, "Manager", props.getManager());
|
||||||
appendIfPresent(text, "Pages", props.getPages());
|
appendIfPresent(text, "Pages", props.getPages());
|
||||||
appendIfPresent(text, "Paragraphs", props.getParagraphs());
|
appendIfPresent(text, "Paragraphs", props.getParagraphs());
|
||||||
appendIfPresent(text, "PresentationFormat", props.getPresentationFormat());
|
appendIfPresent(text, "PresentationFormat", props.getPresentationFormat());
|
||||||
appendIfPresent(text, "Template", props.getTemplate());
|
appendIfPresent(text, "Template", props.getTemplate());
|
||||||
appendIfPresent(text, "TotalTime", props.getTotalTime());
|
appendIfPresent(text, "TotalTime", props.getTotalTime());
|
||||||
|
|
||||||
return text.toString();
|
return text.toString();
|
||||||
}
|
}
|
||||||
/**
|
|
||||||
* Returns the custom document properties, if there are any
|
|
||||||
*
|
|
||||||
* @return the custom document properties
|
|
||||||
*/
|
|
||||||
@SuppressWarnings({ "resource" })
|
|
||||||
public String getCustomPropertiesText() {
|
|
||||||
POIXMLDocument document = getDocument();
|
|
||||||
if(document == null) { // event based extractor does not have a document
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
StringBuilder text = new StringBuilder();
|
/**
|
||||||
org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
|
* Returns the custom document properties, if there are any
|
||||||
props = document.getProperties().getCustomProperties().getUnderlyingProperties();
|
*
|
||||||
|
* @return the custom document properties
|
||||||
|
*/
|
||||||
|
@SuppressWarnings({"resource"})
|
||||||
|
public String getCustomPropertiesText() {
|
||||||
|
POIXMLDocument document = getDocument();
|
||||||
|
if (document == null) { // event based extractor does not have a document
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
for(CTProperty property : props.getPropertyArray()) {
|
StringBuilder text = new StringBuilder();
|
||||||
String val = "(not implemented!)";
|
org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
|
||||||
|
props = document.getProperties().getCustomProperties().getUnderlyingProperties();
|
||||||
|
|
||||||
if (property.isSetLpwstr()) {
|
for (CTProperty property : props.getPropertyArray()) {
|
||||||
val = property.getLpwstr();
|
String val = "(not implemented!)";
|
||||||
}
|
|
||||||
else if (property.isSetLpstr()) {
|
|
||||||
val = property.getLpstr();
|
|
||||||
}
|
|
||||||
else if (property.isSetDate()) {
|
|
||||||
val = property.getDate().toString();
|
|
||||||
}
|
|
||||||
else if (property.isSetFiletime()) {
|
|
||||||
val = property.getFiletime().toString();
|
|
||||||
}
|
|
||||||
else if (property.isSetBool()) {
|
|
||||||
val = Boolean.toString( property.getBool() );
|
|
||||||
}
|
|
||||||
|
|
||||||
// Integers
|
if (property.isSetLpwstr()) {
|
||||||
else if (property.isSetI1()) {
|
val = property.getLpwstr();
|
||||||
val = Integer.toString(property.getI1());
|
} else if (property.isSetLpstr()) {
|
||||||
}
|
val = property.getLpstr();
|
||||||
else if (property.isSetI2()) {
|
} else if (property.isSetDate()) {
|
||||||
val = Integer.toString(property.getI2());
|
val = property.getDate().toString();
|
||||||
}
|
} else if (property.isSetFiletime()) {
|
||||||
else if (property.isSetI4()) {
|
val = property.getFiletime().toString();
|
||||||
val = Integer.toString(property.getI4());
|
} else if (property.isSetBool()) {
|
||||||
}
|
val = Boolean.toString(property.getBool());
|
||||||
else if (property.isSetI8()) {
|
}
|
||||||
val = Long.toString(property.getI8());
|
|
||||||
}
|
// Integers
|
||||||
else if (property.isSetInt()) {
|
else if (property.isSetI1()) {
|
||||||
val = Integer.toString( property.getInt() );
|
val = Integer.toString(property.getI1());
|
||||||
}
|
} else if (property.isSetI2()) {
|
||||||
|
val = Integer.toString(property.getI2());
|
||||||
// Unsigned Integers
|
} else if (property.isSetI4()) {
|
||||||
else if (property.isSetUi1()) {
|
val = Integer.toString(property.getI4());
|
||||||
val = Integer.toString(property.getUi1());
|
} else if (property.isSetI8()) {
|
||||||
}
|
val = Long.toString(property.getI8());
|
||||||
else if (property.isSetUi2()) {
|
} else if (property.isSetInt()) {
|
||||||
val = Integer.toString(property.getUi2());
|
val = Integer.toString(property.getInt());
|
||||||
}
|
}
|
||||||
else if (property.isSetUi4()) {
|
|
||||||
val = Long.toString(property.getUi4());
|
// Unsigned Integers
|
||||||
}
|
else if (property.isSetUi1()) {
|
||||||
else if (property.isSetUi8()) {
|
val = Integer.toString(property.getUi1());
|
||||||
val = property.getUi8().toString();
|
} else if (property.isSetUi2()) {
|
||||||
}
|
val = Integer.toString(property.getUi2());
|
||||||
else if (property.isSetUint()) {
|
} else if (property.isSetUi4()) {
|
||||||
val = Long.toString(property.getUint());
|
val = Long.toString(property.getUi4());
|
||||||
}
|
} else if (property.isSetUi8()) {
|
||||||
|
val = property.getUi8().toString();
|
||||||
// Reals
|
} else if (property.isSetUint()) {
|
||||||
else if (property.isSetR4()) {
|
val = Long.toString(property.getUint());
|
||||||
val = Float.toString( property.getR4() );
|
}
|
||||||
}
|
|
||||||
else if (property.isSetR8()) {
|
// Reals
|
||||||
val = Double.toString( property.getR8() );
|
else if (property.isSetR4()) {
|
||||||
}
|
val = Float.toString(property.getR4());
|
||||||
else if (property.isSetDecimal()) {
|
} else if (property.isSetR8()) {
|
||||||
BigDecimal d = property.getDecimal();
|
val = Double.toString(property.getR8());
|
||||||
if (d == null) {
|
} else if (property.isSetDecimal()) {
|
||||||
val = null;
|
BigDecimal d = property.getDecimal();
|
||||||
} else {
|
if (d == null) {
|
||||||
val = d.toPlainString();
|
val = null;
|
||||||
|
} else {
|
||||||
|
val = d.toPlainString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/*else if (property.isSetArray()) {
|
/*else if (property.isSetArray()) {
|
||||||
// TODO Fetch the array values and output
|
// TODO Fetch the array values and output
|
||||||
@ -254,26 +249,26 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
|
|||||||
// TODO Decode, if possible
|
// TODO Decode, if possible
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
text.append(property.getName()).append(" = ").append(val).append("\n");
|
text.append(property.getName()).append(" = ").append(val).append("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
return text.toString();
|
return text.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getText() {
|
public String getText() {
|
||||||
try {
|
try {
|
||||||
return
|
return
|
||||||
getCorePropertiesText() +
|
getCorePropertiesText() +
|
||||||
getExtendedPropertiesText() +
|
getExtendedPropertiesText() +
|
||||||
getCustomPropertiesText();
|
getCustomPropertiesText();
|
||||||
} catch(Exception e) {
|
} catch (Exception e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
|
public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
|
||||||
throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!");
|
throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -601,7 +601,7 @@ public final class PackagingURIHelper {
|
|||||||
* characters.
|
* characters.
|
||||||
*/
|
*/
|
||||||
public static String decodeURI(URI uri) {
|
public static String decodeURI(URI uri) {
|
||||||
StringBuffer retVal = new StringBuffer();
|
StringBuilder retVal = new StringBuilder(64);
|
||||||
String uriStr = uri.toASCIIString();
|
String uriStr = uri.toASCIIString();
|
||||||
char c;
|
char c;
|
||||||
final int length = uriStr.length();
|
final int length = uriStr.length();
|
||||||
|
@ -188,16 +188,16 @@ public final class ContentType {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public final String toString(boolean withParameters) {
|
public final String toString(boolean withParameters) {
|
||||||
StringBuffer retVal = new StringBuffer();
|
StringBuilder retVal = new StringBuilder(64);
|
||||||
retVal.append(this.getType());
|
retVal.append(this.getType());
|
||||||
retVal.append("/");
|
retVal.append('/');
|
||||||
retVal.append(this.getSubType());
|
retVal.append(this.getSubType());
|
||||||
|
|
||||||
if (withParameters) {
|
if (withParameters) {
|
||||||
for (Entry<String, String> me : parameters.entrySet()) {
|
for (Entry<String, String> me : parameters.entrySet()) {
|
||||||
retVal.append(";");
|
retVal.append(';');
|
||||||
retVal.append(me.getKey());
|
retVal.append(me.getKey());
|
||||||
retVal.append("=");
|
retVal.append('=');
|
||||||
retVal.append(me.getValue());
|
retVal.append(me.getValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -223,7 +223,7 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
|
|||||||
|
|
||||||
//// ContentHandler methods ////
|
//// ContentHandler methods ////
|
||||||
|
|
||||||
private StringBuffer characters;
|
private StringBuilder characters;
|
||||||
private boolean tIsOpen;
|
private boolean tIsOpen;
|
||||||
private boolean inRPh;
|
private boolean inRPh;
|
||||||
|
|
||||||
@ -241,7 +241,7 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
|
|||||||
|
|
||||||
this.strings = new ArrayList<>(this.uniqueCount);
|
this.strings = new ArrayList<>(this.uniqueCount);
|
||||||
this.phoneticStrings = new HashMap<>();
|
this.phoneticStrings = new HashMap<>();
|
||||||
characters = new StringBuffer();
|
characters = new StringBuilder(64);
|
||||||
} else if ("si".equals(localName)) {
|
} else if ("si".equals(localName)) {
|
||||||
characters.setLength(0);
|
characters.setLength(0);
|
||||||
} else if ("t".equals(localName)) {
|
} else if ("t".equals(localName)) {
|
||||||
|
@ -102,9 +102,9 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
|||||||
private boolean formulasNotResults;
|
private boolean formulasNotResults;
|
||||||
|
|
||||||
// Gathers characters as they are seen.
|
// Gathers characters as they are seen.
|
||||||
private StringBuffer value = new StringBuffer();
|
private StringBuilder value = new StringBuilder(64);
|
||||||
private StringBuffer formula = new StringBuffer();
|
private StringBuilder formula = new StringBuilder(64);
|
||||||
private StringBuffer headerFooter = new StringBuffer();
|
private StringBuilder headerFooter = new StringBuilder(64);
|
||||||
|
|
||||||
private Queue<CellAddress> commentCellRefs;
|
private Queue<CellAddress> commentCellRefs;
|
||||||
|
|
||||||
|
@ -122,7 +122,7 @@ public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
|
|||||||
XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable();
|
XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable();
|
||||||
XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData();
|
XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData();
|
||||||
|
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuilder text = new StringBuilder(64);
|
||||||
SheetTextExtractor sheetExtractor = new SheetTextExtractor();
|
SheetTextExtractor sheetExtractor = new SheetTextExtractor();
|
||||||
XSSFBHyperlinksTable hyperlinksTable = null;
|
XSSFBHyperlinksTable hyperlinksTable = null;
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
|
@ -52,10 +52,10 @@ import org.xml.sax.XMLReader;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Implementation of a text extractor from OOXML Excel
|
* Implementation of a text extractor from OOXML Excel
|
||||||
* files that uses SAX event based parsing.
|
* files that uses SAX event based parsing.
|
||||||
*/
|
*/
|
||||||
public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||||
implements org.apache.poi.ss.extractor.ExcelExtractor {
|
implements org.apache.poi.ss.extractor.ExcelExtractor {
|
||||||
|
|
||||||
private static final POILogger LOGGER = POILogFactory.getLogger(XSSFEventBasedExcelExtractor.class);
|
private static final POILogger LOGGER = POILogFactory.getLogger(XSSFEventBasedExcelExtractor.class);
|
||||||
|
|
||||||
@ -73,6 +73,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
|
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
|
||||||
this(OPCPackage.open(path));
|
this(OPCPackage.open(path));
|
||||||
}
|
}
|
||||||
|
|
||||||
public XSSFEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
|
public XSSFEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
|
||||||
super(null);
|
super(null);
|
||||||
this.container = container;
|
this.container = container;
|
||||||
@ -81,13 +82,13 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
if(args.length < 1) {
|
if (args.length < 1) {
|
||||||
System.err.println("Use:");
|
System.err.println("Use:");
|
||||||
System.err.println(" XSSFEventBasedExcelExtractor <filename.xlsx>");
|
System.err.println(" XSSFEventBasedExcelExtractor <filename.xlsx>");
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
}
|
}
|
||||||
POIXMLTextExtractor extractor =
|
POIXMLTextExtractor extractor =
|
||||||
new XSSFEventBasedExcelExtractor(args[0]);
|
new XSSFEventBasedExcelExtractor(args[0]);
|
||||||
System.out.println(extractor.getText());
|
System.out.println(extractor.getText());
|
||||||
extractor.close();
|
extractor.close();
|
||||||
}
|
}
|
||||||
@ -101,9 +102,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
|
||||||
* @return whether to include sheet names
|
* @return whether to include sheet names
|
||||||
*
|
|
||||||
* @since 3.16-beta3
|
* @since 3.16-beta3
|
||||||
*/
|
*/
|
||||||
public boolean getIncludeSheetNames() {
|
public boolean getIncludeSheetNames() {
|
||||||
@ -112,16 +111,14 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Should we return the formula itself, and not
|
* Should we return the formula itself, and not
|
||||||
* the result it produces? Default is false
|
* the result it produces? Default is false
|
||||||
*/
|
*/
|
||||||
public void setFormulasNotResults(boolean formulasNotResults) {
|
public void setFormulasNotResults(boolean formulasNotResults) {
|
||||||
this.formulasNotResults = formulasNotResults;
|
this.formulasNotResults = formulasNotResults;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
|
||||||
* @return whether to include formulas but not results
|
* @return whether to include formulas but not results
|
||||||
*
|
|
||||||
* @since 3.16-beta3
|
* @since 3.16-beta3
|
||||||
*/
|
*/
|
||||||
public boolean getFormulasNotResults() {
|
public boolean getFormulasNotResults() {
|
||||||
@ -136,14 +133,13 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
|
||||||
* @return whether or not to include headers and footers
|
* @return whether or not to include headers and footers
|
||||||
*
|
|
||||||
* @since 3.16-beta3
|
* @since 3.16-beta3
|
||||||
*/
|
*/
|
||||||
public boolean getIncludeHeadersFooters() {
|
public boolean getIncludeHeadersFooters() {
|
||||||
return includeHeadersFooters;
|
return includeHeadersFooters;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should text from textboxes be included? Default is true
|
* Should text from textboxes be included? Default is true
|
||||||
*/
|
*/
|
||||||
@ -152,14 +148,13 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
|
||||||
* @return whether or not to extract textboxes
|
* @return whether or not to extract textboxes
|
||||||
*
|
|
||||||
* @since 3.16-beta3
|
* @since 3.16-beta3
|
||||||
*/
|
*/
|
||||||
public boolean getIncludeTextBoxes() {
|
public boolean getIncludeTextBoxes() {
|
||||||
return includeTextBoxes;
|
return includeTextBoxes;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should cell comments be included? Default is false
|
* Should cell comments be included? Default is false
|
||||||
*/
|
*/
|
||||||
@ -169,32 +164,34 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @return whether cell comments should be included
|
* @return whether cell comments should be included
|
||||||
*
|
|
||||||
* @since 3.16-beta3
|
* @since 3.16-beta3
|
||||||
*/
|
*/
|
||||||
public boolean getIncludeCellComments() {
|
public boolean getIncludeCellComments() {
|
||||||
return includeCellComments;
|
return includeCellComments;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Concatenate text from <rPh> text elements in SharedStringsTable
|
* Concatenate text from <rPh> text elements in SharedStringsTable
|
||||||
* Default is true;
|
* Default is true;
|
||||||
|
*
|
||||||
* @param concatenatePhoneticRuns true if runs should be concatenated, false otherwise
|
* @param concatenatePhoneticRuns true if runs should be concatenated, false otherwise
|
||||||
*/
|
*/
|
||||||
public void setConcatenatePhoneticRuns(boolean concatenatePhoneticRuns) {
|
public void setConcatenatePhoneticRuns(boolean concatenatePhoneticRuns) {
|
||||||
this.concatenatePhoneticRuns = concatenatePhoneticRuns;
|
this.concatenatePhoneticRuns = concatenatePhoneticRuns;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setLocale(Locale locale) {
|
public void setLocale(Locale locale) {
|
||||||
this.locale = locale;
|
this.locale = locale;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return locale
|
* @return locale
|
||||||
*
|
|
||||||
* @since 3.16-beta3
|
* @since 3.16-beta3
|
||||||
*/
|
*/
|
||||||
public Locale getLocale() {
|
public Locale getLocale() {
|
||||||
return locale;
|
return locale;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the opened OPCPackage container.
|
* Returns the opened OPCPackage container.
|
||||||
*/
|
*/
|
||||||
@ -210,6 +207,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
public CoreProperties getCoreProperties() {
|
public CoreProperties getCoreProperties() {
|
||||||
return properties.getCoreProperties();
|
return properties.getCoreProperties();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the extended document properties
|
* Returns the extended document properties
|
||||||
*/
|
*/
|
||||||
@ -217,6 +215,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
public ExtendedProperties getExtendedProperties() {
|
public ExtendedProperties getExtendedProperties() {
|
||||||
return properties.getExtendedProperties();
|
return properties.getExtendedProperties();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the custom document properties
|
* Returns the custom document properties
|
||||||
*/
|
*/
|
||||||
@ -226,7 +225,6 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Processes the given sheet
|
* Processes the given sheet
|
||||||
*/
|
*/
|
||||||
@ -238,96 +236,95 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
InputStream sheetInputStream)
|
InputStream sheetInputStream)
|
||||||
throws IOException, SAXException {
|
throws IOException, SAXException {
|
||||||
|
|
||||||
DataFormatter formatter;
|
DataFormatter formatter;
|
||||||
if(locale == null) {
|
if (locale == null) {
|
||||||
formatter = new DataFormatter();
|
formatter = new DataFormatter();
|
||||||
} else {
|
} else {
|
||||||
formatter = new DataFormatter(locale);
|
formatter = new DataFormatter(locale);
|
||||||
}
|
}
|
||||||
|
|
||||||
InputSource sheetSource = new InputSource(sheetInputStream);
|
|
||||||
try {
|
|
||||||
XMLReader sheetParser = SAXHelper.newXMLReader();
|
|
||||||
ContentHandler handler = new XSSFSheetXMLHandler(
|
|
||||||
styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults);
|
|
||||||
sheetParser.setContentHandler(handler);
|
|
||||||
sheetParser.parse(sheetSource);
|
|
||||||
} catch(ParserConfigurationException e) {
|
|
||||||
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
InputSource sheetSource = new InputSource(sheetInputStream);
|
||||||
* Processes the file and returns the text
|
try {
|
||||||
*/
|
XMLReader sheetParser = SAXHelper.newXMLReader();
|
||||||
public String getText() {
|
ContentHandler handler = new XSSFSheetXMLHandler(
|
||||||
try {
|
styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults);
|
||||||
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns);
|
sheetParser.setContentHandler(handler);
|
||||||
XSSFReader xssfReader = new XSSFReader(container);
|
sheetParser.parse(sheetSource);
|
||||||
StylesTable styles = xssfReader.getStylesTable();
|
} catch (ParserConfigurationException e) {
|
||||||
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
|
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
|
||||||
|
}
|
||||||
StringBuffer text = new StringBuffer();
|
}
|
||||||
SheetTextExtractor sheetExtractor = new SheetTextExtractor();
|
|
||||||
|
/**
|
||||||
while (iter.hasNext()) {
|
* Processes the file and returns the text
|
||||||
InputStream stream = iter.next();
|
*/
|
||||||
if(includeSheetNames) {
|
public String getText() {
|
||||||
text.append(iter.getSheetName());
|
try {
|
||||||
text.append('\n');
|
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns);
|
||||||
}
|
XSSFReader xssfReader = new XSSFReader(container);
|
||||||
CommentsTable comments = includeCellComments ? iter.getSheetComments() : null;
|
StylesTable styles = xssfReader.getStylesTable();
|
||||||
processSheet(sheetExtractor, styles, comments, strings, stream);
|
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
|
||||||
if (includeHeadersFooters) {
|
StringBuilder text = new StringBuilder(64);
|
||||||
sheetExtractor.appendHeaderText(text);
|
SheetTextExtractor sheetExtractor = new SheetTextExtractor();
|
||||||
}
|
|
||||||
sheetExtractor.appendCellText(text);
|
while (iter.hasNext()) {
|
||||||
if (includeTextBoxes){
|
InputStream stream = iter.next();
|
||||||
processShapes(iter.getShapes(), text);
|
if (includeSheetNames) {
|
||||||
}
|
text.append(iter.getSheetName());
|
||||||
if (includeHeadersFooters) {
|
text.append('\n');
|
||||||
sheetExtractor.appendFooterText(text);
|
}
|
||||||
}
|
CommentsTable comments = includeCellComments ? iter.getSheetComments() : null;
|
||||||
sheetExtractor.reset();
|
processSheet(sheetExtractor, styles, comments, strings, stream);
|
||||||
stream.close();
|
if (includeHeadersFooters) {
|
||||||
}
|
sheetExtractor.appendHeaderText(text);
|
||||||
|
}
|
||||||
return text.toString();
|
sheetExtractor.appendCellText(text);
|
||||||
} catch(IOException | OpenXML4JException | SAXException e) {
|
if (includeTextBoxes) {
|
||||||
LOGGER.log(POILogger.WARN, e);
|
processShapes(iter.getShapes(), text);
|
||||||
return null;
|
}
|
||||||
}
|
if (includeHeadersFooters) {
|
||||||
}
|
sheetExtractor.appendFooterText(text);
|
||||||
|
}
|
||||||
void processShapes(List<XSSFShape> shapes, StringBuffer text) {
|
sheetExtractor.reset();
|
||||||
if (shapes == null){
|
stream.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
return text.toString();
|
||||||
|
} catch (IOException | OpenXML4JException | SAXException e) {
|
||||||
|
LOGGER.log(POILogger.WARN, e);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void processShapes(List<XSSFShape> shapes, StringBuilder text) {
|
||||||
|
if (shapes == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
for (XSSFShape shape : shapes){
|
for (XSSFShape shape : shapes) {
|
||||||
if (shape instanceof XSSFSimpleShape){
|
if (shape instanceof XSSFSimpleShape) {
|
||||||
String sText = ((XSSFSimpleShape)shape).getText();
|
String sText = ((XSSFSimpleShape) shape).getText();
|
||||||
if (sText != null && sText.length() > 0){
|
if (sText != null && sText.length() > 0) {
|
||||||
text.append(sText).append('\n');
|
text.append(sText).append('\n');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
if (container != null) {
|
if (container != null) {
|
||||||
container.close();
|
container.close();
|
||||||
container = null;
|
container = null;
|
||||||
}
|
}
|
||||||
super.close();
|
super.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected class SheetTextExtractor implements SheetContentsHandler {
|
protected class SheetTextExtractor implements SheetContentsHandler {
|
||||||
private final StringBuffer output;
|
private final StringBuilder output = new StringBuilder(64);
|
||||||
private boolean firstCellOfRow;
|
private boolean firstCellOfRow;
|
||||||
private final Map<String, String> headerFooterMap;
|
private final Map<String, String> headerFooterMap;
|
||||||
|
|
||||||
protected SheetTextExtractor() {
|
protected SheetTextExtractor() {
|
||||||
this.output = new StringBuffer();
|
|
||||||
this.firstCellOfRow = true;
|
this.firstCellOfRow = true;
|
||||||
this.headerFooterMap = includeHeadersFooters ? new HashMap<>() : null;
|
this.headerFooterMap = includeHeadersFooters ? new HashMap<>() : null;
|
||||||
}
|
}
|
||||||
@ -344,7 +341,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void cell(String cellRef, String formattedValue, XSSFComment comment) {
|
public void cell(String cellRef, String formattedValue, XSSFComment comment) {
|
||||||
if(firstCellOfRow) {
|
if (firstCellOfRow) {
|
||||||
firstCellOfRow = false;
|
firstCellOfRow = false;
|
||||||
} else {
|
} else {
|
||||||
output.append('\t');
|
output.append('\t');
|
||||||
@ -375,7 +372,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
/**
|
/**
|
||||||
* Append the text for the named header or footer if found.
|
* Append the text for the named header or footer if found.
|
||||||
*/
|
*/
|
||||||
private void appendHeaderFooterText(StringBuffer buffer, String name) {
|
private void appendHeaderFooterText(StringBuilder buffer, String name) {
|
||||||
String text = headerFooterMap.get(name);
|
String text = headerFooterMap.get(name);
|
||||||
if (text != null && text.length() > 0) {
|
if (text != null && text.length() > 0) {
|
||||||
// this is a naive way of handling the left, center, and right
|
// this is a naive way of handling the left, center, and right
|
||||||
@ -387,6 +384,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
buffer.append(text).append('\n');
|
buffer.append(text).append('\n');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Remove the delimiter if its found at the beginning of the text,
|
* Remove the delimiter if its found at the beginning of the text,
|
||||||
* or replace it with a tab if its in the middle.
|
* or replace it with a tab if its in the middle.
|
||||||
@ -405,10 +403,11 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
/**
|
/**
|
||||||
* Append the text for each header type in the same order
|
* Append the text for each header type in the same order
|
||||||
* they are appended in XSSFExcelExtractor.
|
* they are appended in XSSFExcelExtractor.
|
||||||
|
*
|
||||||
* @see XSSFExcelExtractor#getText()
|
* @see XSSFExcelExtractor#getText()
|
||||||
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
|
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
|
||||||
*/
|
*/
|
||||||
void appendHeaderText(StringBuffer buffer) {
|
void appendHeaderText(StringBuilder buffer) {
|
||||||
appendHeaderFooterText(buffer, "firstHeader");
|
appendHeaderFooterText(buffer, "firstHeader");
|
||||||
appendHeaderFooterText(buffer, "oddHeader");
|
appendHeaderFooterText(buffer, "oddHeader");
|
||||||
appendHeaderFooterText(buffer, "evenHeader");
|
appendHeaderFooterText(buffer, "evenHeader");
|
||||||
@ -417,10 +416,11 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
/**
|
/**
|
||||||
* Append the text for each footer type in the same order
|
* Append the text for each footer type in the same order
|
||||||
* they are appended in XSSFExcelExtractor.
|
* they are appended in XSSFExcelExtractor.
|
||||||
|
*
|
||||||
* @see XSSFExcelExtractor#getText()
|
* @see XSSFExcelExtractor#getText()
|
||||||
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
|
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
|
||||||
*/
|
*/
|
||||||
void appendFooterText(StringBuffer buffer) {
|
void appendFooterText(StringBuilder buffer) {
|
||||||
// append the text for each footer type in the same order
|
// append the text for each footer type in the same order
|
||||||
// they are appended in XSSFExcelExtractor
|
// they are appended in XSSFExcelExtractor
|
||||||
appendHeaderFooterText(buffer, "firstFooter");
|
appendHeaderFooterText(buffer, "firstFooter");
|
||||||
@ -431,7 +431,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
/**
|
/**
|
||||||
* Append the cell contents we have collected.
|
* Append the cell contents we have collected.
|
||||||
*/
|
*/
|
||||||
void appendCellText(StringBuffer buffer) {
|
void appendCellText(StringBuilder buffer) {
|
||||||
checkMaxTextSize(buffer, output.toString());
|
checkMaxTextSize(buffer, output.toString());
|
||||||
buffer.append(output);
|
buffer.append(output);
|
||||||
}
|
}
|
||||||
|
@ -292,7 +292,7 @@ public final class XSSFChart extends POIXMLDocumentPart implements Chart, ChartA
|
|||||||
// TODO Do properly
|
// TODO Do properly
|
||||||
CTTitle title = chart.getTitle();
|
CTTitle title = chart.getTitle();
|
||||||
|
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuilder text = new StringBuilder(64);
|
||||||
XmlObject[] t = title
|
XmlObject[] t = title
|
||||||
.selectPath("declare namespace a='"+XSSFDrawing.NAMESPACE_A+"' .//a:t");
|
.selectPath("declare namespace a='"+XSSFDrawing.NAMESPACE_A+"' .//a:t");
|
||||||
for (int m = 0; m < t.length; m++) {
|
for (int m = 0; m < t.length; m++) {
|
||||||
|
@ -98,7 +98,7 @@ public class HeaderFooterHelper {
|
|||||||
return joinParts(parts[0], parts[1], parts[2]);
|
return joinParts(parts[0], parts[1], parts[2]);
|
||||||
}
|
}
|
||||||
private String joinParts(String l, String c, String r) {
|
private String joinParts(String l, String c, String r) {
|
||||||
StringBuffer ret = new StringBuffer();
|
StringBuilder ret = new StringBuilder(64);
|
||||||
|
|
||||||
// Join as c, l, r
|
// Join as c, l, r
|
||||||
if(c.length() > 0) {
|
if(c.length() > 0) {
|
||||||
|
@ -97,7 +97,7 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public String getText() {
|
public String getText() {
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuilder text = new StringBuilder(64);
|
||||||
XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
|
XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
|
||||||
|
|
||||||
// Start out with all headers
|
// Start out with all headers
|
||||||
@ -115,7 +115,7 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
|||||||
return text.toString();
|
return text.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void appendBodyElementText(StringBuffer text, IBodyElement e) {
|
public void appendBodyElementText(StringBuilder text, IBodyElement e) {
|
||||||
if (e instanceof XWPFParagraph) {
|
if (e instanceof XWPFParagraph) {
|
||||||
appendParagraphText(text, (XWPFParagraph) e);
|
appendParagraphText(text, (XWPFParagraph) e);
|
||||||
} else if (e instanceof XWPFTable) {
|
} else if (e instanceof XWPFTable) {
|
||||||
@ -125,7 +125,7 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void appendParagraphText(StringBuffer text, XWPFParagraph paragraph) {
|
public void appendParagraphText(StringBuilder text, XWPFParagraph paragraph) {
|
||||||
CTSectPr ctSectPr = null;
|
CTSectPr ctSectPr = null;
|
||||||
if (paragraph.getCTP().getPPr() != null) {
|
if (paragraph.getCTP().getPPr() != null) {
|
||||||
ctSectPr = paragraph.getCTP().getPPr().getSectPr();
|
ctSectPr = paragraph.getCTP().getPPr().getSectPr();
|
||||||
@ -170,7 +170,7 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void appendTableText(StringBuffer text, XWPFTable table) {
|
private void appendTableText(StringBuilder text, XWPFTable table) {
|
||||||
//this works recursively to pull embedded tables from tables
|
//this works recursively to pull embedded tables from tables
|
||||||
for (XWPFTableRow row : table.getRows()) {
|
for (XWPFTableRow row : table.getRows()) {
|
||||||
List<ICell> cells = row.getTableICells();
|
List<ICell> cells = row.getTableICells();
|
||||||
@ -189,7 +189,7 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void extractFooters(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
|
private void extractFooters(StringBuilder text, XWPFHeaderFooterPolicy hfPolicy) {
|
||||||
if (hfPolicy == null) return;
|
if (hfPolicy == null) return;
|
||||||
|
|
||||||
if (hfPolicy.getFirstPageFooter() != null) {
|
if (hfPolicy.getFirstPageFooter() != null) {
|
||||||
@ -203,7 +203,7 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void extractHeaders(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
|
private void extractHeaders(StringBuilder text, XWPFHeaderFooterPolicy hfPolicy) {
|
||||||
if (hfPolicy == null) return;
|
if (hfPolicy == null) return;
|
||||||
|
|
||||||
if (hfPolicy.getFirstPageHeader() != null) {
|
if (hfPolicy.getFirstPageHeader() != null) {
|
||||||
|
@ -27,7 +27,7 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTMarkupRange;
|
|||||||
* @author Yury Batrakov (batrakov at gmail.com)
|
* @author Yury Batrakov (batrakov at gmail.com)
|
||||||
*/
|
*/
|
||||||
public class XWPFCommentsDecorator extends XWPFParagraphDecorator {
|
public class XWPFCommentsDecorator extends XWPFParagraphDecorator {
|
||||||
private StringBuffer commentText;
|
private StringBuilder commentText;
|
||||||
|
|
||||||
public XWPFCommentsDecorator(XWPFParagraphDecorator nextDecorator) {
|
public XWPFCommentsDecorator(XWPFParagraphDecorator nextDecorator) {
|
||||||
this(nextDecorator.paragraph, nextDecorator);
|
this(nextDecorator.paragraph, nextDecorator);
|
||||||
@ -37,11 +37,15 @@ public class XWPFCommentsDecorator extends XWPFParagraphDecorator {
|
|||||||
super(paragraph, nextDecorator);
|
super(paragraph, nextDecorator);
|
||||||
|
|
||||||
XWPFComment comment;
|
XWPFComment comment;
|
||||||
commentText = new StringBuffer();
|
commentText = new StringBuilder(64);
|
||||||
|
|
||||||
for (CTMarkupRange anchor : paragraph.getCTP().getCommentRangeStartArray()) {
|
for (CTMarkupRange anchor : paragraph.getCTP().getCommentRangeStartArray()) {
|
||||||
if ((comment = paragraph.getDocument().getCommentByID(anchor.getId().toString())) != null)
|
if ((comment = paragraph.getDocument().getCommentByID(anchor.getId().toString())) != null) {
|
||||||
commentText.append("\tComment by " + comment.getAuthor() + ": " + comment.getText());
|
commentText.append("\tComment by ")
|
||||||
|
.append(comment.getAuthor())
|
||||||
|
.append(": ")
|
||||||
|
.append(comment.getText());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,10 +27,10 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
|
|||||||
public class XWPFComment {
|
public class XWPFComment {
|
||||||
protected String id;
|
protected String id;
|
||||||
protected String author;
|
protected String author;
|
||||||
protected StringBuffer text;
|
protected StringBuilder text;
|
||||||
|
|
||||||
public XWPFComment(CTComment comment, XWPFDocument document) {
|
public XWPFComment(CTComment comment, XWPFDocument document) {
|
||||||
text = new StringBuffer();
|
text = new StringBuilder(64);
|
||||||
id = comment.getId().toString();
|
id = comment.getId().toString();
|
||||||
author = comment.getAuthor();
|
author = comment.getAuthor();
|
||||||
|
|
||||||
|
@ -127,7 +127,7 @@ public abstract class XWPFHeaderFooter extends POIXMLDocumentPart implements IBo
|
|||||||
* by flattening out the text of its paragraph(s)
|
* by flattening out the text of its paragraph(s)
|
||||||
*/
|
*/
|
||||||
public String getText() {
|
public String getText() {
|
||||||
StringBuffer t = new StringBuffer();
|
StringBuilder t = new StringBuilder(64);
|
||||||
//TODO: simplify this to get ibody elements in order
|
//TODO: simplify this to get ibody elements in order
|
||||||
for (int i = 0; i < paragraphs.size(); i++) {
|
for (int i = 0; i < paragraphs.size(); i++) {
|
||||||
if (!paragraphs.get(i).isEmpty()) {
|
if (!paragraphs.get(i).isEmpty()) {
|
||||||
|
@ -45,7 +45,7 @@ public class XWPFParagraph implements IBodyElement, IRunBody, ISDTContents, Para
|
|||||||
protected List<XWPFRun> runs;
|
protected List<XWPFRun> runs;
|
||||||
protected List<IRunElement> iruns;
|
protected List<IRunElement> iruns;
|
||||||
|
|
||||||
private StringBuffer footnoteText = new StringBuffer();
|
private StringBuilder footnoteText = new StringBuilder(64);
|
||||||
|
|
||||||
public XWPFParagraph(CTP prgrph, IBody part) {
|
public XWPFParagraph(CTP prgrph, IBody part) {
|
||||||
this.paragraph = prgrph;
|
this.paragraph = prgrph;
|
||||||
@ -184,7 +184,7 @@ public class XWPFParagraph implements IBodyElement, IRunBody, ISDTContents, Para
|
|||||||
* and sdt elements in it.
|
* and sdt elements in it.
|
||||||
*/
|
*/
|
||||||
public String getText() {
|
public String getText() {
|
||||||
StringBuffer out = new StringBuffer();
|
StringBuilder out = new StringBuilder(64);
|
||||||
for (IRunElement run : iruns) {
|
for (IRunElement run : iruns) {
|
||||||
if (run instanceof XWPFRun) {
|
if (run instanceof XWPFRun) {
|
||||||
XWPFRun xRun = (XWPFRun) run;
|
XWPFRun xRun = (XWPFRun) run;
|
||||||
@ -398,7 +398,7 @@ public class XWPFParagraph implements IBodyElement, IRunBody, ISDTContents, Para
|
|||||||
* paragraph
|
* paragraph
|
||||||
*/
|
*/
|
||||||
public String getParagraphText() {
|
public String getParagraphText() {
|
||||||
StringBuffer out = new StringBuffer();
|
StringBuilder out = new StringBuilder(64);
|
||||||
for (XWPFRun run : runs) {
|
for (XWPFRun run : runs) {
|
||||||
out.append(run);
|
out.append(run);
|
||||||
}
|
}
|
||||||
@ -409,7 +409,7 @@ public class XWPFParagraph implements IBodyElement, IRunBody, ISDTContents, Para
|
|||||||
* Returns any text from any suitable pictures in the paragraph
|
* Returns any text from any suitable pictures in the paragraph
|
||||||
*/
|
*/
|
||||||
public String getPictureText() {
|
public String getPictureText() {
|
||||||
StringBuffer out = new StringBuffer();
|
StringBuilder out = new StringBuilder(64);
|
||||||
for (XWPFRun run : runs) {
|
for (XWPFRun run : runs) {
|
||||||
out.append(run.getPictureText());
|
out.append(run.getPictureText());
|
||||||
}
|
}
|
||||||
|
@ -1060,7 +1060,7 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
|
|||||||
* carriage returns in place of their xml equivalents.
|
* carriage returns in place of their xml equivalents.
|
||||||
*/
|
*/
|
||||||
public String text() {
|
public String text() {
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuilder text = new StringBuilder(64);
|
||||||
|
|
||||||
// Grab the text and tabs of the text run
|
// Grab the text and tabs of the text run
|
||||||
// Do so in a way that preserves the ordering
|
// Do so in a way that preserves the ordering
|
||||||
@ -1084,7 +1084,7 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
|
|||||||
* @return the phonetic (ruby) string associated with this run or an empty String if none exists
|
* @return the phonetic (ruby) string associated with this run or an empty String if none exists
|
||||||
*/
|
*/
|
||||||
public String getPhonetic() {
|
public String getPhonetic() {
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuilder text = new StringBuilder(64);
|
||||||
|
|
||||||
// Grab the text and tabs of the text run
|
// Grab the text and tabs of the text run
|
||||||
// Do so in a way that preserves the ordering
|
// Do so in a way that preserves the ordering
|
||||||
@ -1110,7 +1110,7 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
|
|||||||
* @param text buffer to which to append the content
|
* @param text buffer to which to append the content
|
||||||
* @param extractPhonetic extract the phonetic (rt) component or the base component
|
* @param extractPhonetic extract the phonetic (rt) component or the base component
|
||||||
*/
|
*/
|
||||||
private void handleRuby(XmlObject rubyObj, StringBuffer text, boolean extractPhonetic) {
|
private void handleRuby(XmlObject rubyObj, StringBuilder text, boolean extractPhonetic) {
|
||||||
XmlCursor c = rubyObj.newCursor();
|
XmlCursor c = rubyObj.newCursor();
|
||||||
|
|
||||||
//according to the spec, a ruby object
|
//according to the spec, a ruby object
|
||||||
@ -1141,7 +1141,7 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
|
|||||||
c.dispose();
|
c.dispose();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void _getText(XmlObject o, StringBuffer text) {
|
private void _getText(XmlObject o, StringBuilder text) {
|
||||||
|
|
||||||
if (o instanceof CTText) {
|
if (o instanceof CTText) {
|
||||||
String tagName = o.getDomNode().getNodeName();
|
String tagName = o.getDomNode().getNodeName();
|
||||||
@ -1170,10 +1170,10 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (o instanceof CTPTab) {
|
if (o instanceof CTPTab) {
|
||||||
text.append("\t");
|
text.append('\t');
|
||||||
}
|
}
|
||||||
if (o instanceof CTBr) {
|
if (o instanceof CTBr) {
|
||||||
text.append("\n");
|
text.append('\n');
|
||||||
}
|
}
|
||||||
if (o instanceof CTEmpty) {
|
if (o instanceof CTEmpty) {
|
||||||
// Some inline text elements get returned not as
|
// Some inline text elements get returned not as
|
||||||
@ -1183,13 +1183,13 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
|
|||||||
// rules for that case
|
// rules for that case
|
||||||
String tagName = o.getDomNode().getNodeName();
|
String tagName = o.getDomNode().getNodeName();
|
||||||
if ("w:tab".equals(tagName) || "tab".equals(tagName)) {
|
if ("w:tab".equals(tagName) || "tab".equals(tagName)) {
|
||||||
text.append("\t");
|
text.append('\t');
|
||||||
}
|
}
|
||||||
if ("w:br".equals(tagName) || "br".equals(tagName)) {
|
if ("w:br".equals(tagName) || "br".equals(tagName)) {
|
||||||
text.append("\n");
|
text.append('\n');
|
||||||
}
|
}
|
||||||
if ("w:cr".equals(tagName) || "cr".equals(tagName)) {
|
if ("w:cr".equals(tagName) || "cr".equals(tagName)) {
|
||||||
text.append("\n");
|
text.append('\n');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (o instanceof CTFtnEdnRef) {
|
if (o instanceof CTFtnEdnRef) {
|
||||||
|
@ -71,7 +71,7 @@ public class XWPFTable implements IBodyElement, ISDTContents {
|
|||||||
stBorderTypeMap.put(STBorder.INT_DOT_DASH, XWPFBorderType.DOT_DASH);
|
stBorderTypeMap.put(STBorder.INT_DOT_DASH, XWPFBorderType.DOT_DASH);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected StringBuffer text = new StringBuffer();
|
protected StringBuilder text = new StringBuilder(64);
|
||||||
protected List<XWPFTableRow> tableRows;
|
protected List<XWPFTableRow> tableRows;
|
||||||
|
|
||||||
// Unused: UUF_UNUSED_PUBLIC_OR_PROTECTED_FIELD
|
// Unused: UUF_UNUSED_PUBLIC_OR_PROTECTED_FIELD
|
||||||
|
@ -435,7 +435,7 @@ public class XWPFTableCell implements IBody, ICell {
|
|||||||
*/
|
*/
|
||||||
public String getTextRecursively() {
|
public String getTextRecursively() {
|
||||||
|
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuilder text = new StringBuilder(64);
|
||||||
for (int i = 0; i < bodyElements.size(); i++) {
|
for (int i = 0; i < bodyElements.size(); i++) {
|
||||||
boolean isLast = (i == bodyElements.size() - 1);
|
boolean isLast = (i == bodyElements.size() - 1);
|
||||||
appendBodyElementText(text, bodyElements.get(i), isLast);
|
appendBodyElementText(text, bodyElements.get(i), isLast);
|
||||||
@ -444,7 +444,7 @@ public class XWPFTableCell implements IBody, ICell {
|
|||||||
return text.toString();
|
return text.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void appendBodyElementText(StringBuffer text, IBodyElement e, boolean isLast) {
|
private void appendBodyElementText(StringBuilder text, IBodyElement e, boolean isLast) {
|
||||||
if (e instanceof XWPFParagraph) {
|
if (e instanceof XWPFParagraph) {
|
||||||
text.append(((XWPFParagraph) e).getText());
|
text.append(((XWPFParagraph) e).getText());
|
||||||
if (!isLast) {
|
if (!isLast) {
|
||||||
|
Loading…
Reference in New Issue
Block a user