use StringBuilder in more places

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1813869 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
PJ Fanning 2017-10-31 10:24:47 +00:00
parent bf5a0c4c9e
commit 34006a542e
18 changed files with 794 additions and 789 deletions

File diff suppressed because it is too large Load Diff

View File

@ -30,211 +30,206 @@ import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProper
/**
* A {@link POITextExtractor} for returning the textual
* content of the OOXML file properties, eg author
* and title.
* content of the OOXML file properties, eg author
* and title.
*/
public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
private final DateFormat dateFormat;
/**
* Creates a new POIXMLPropertiesTextExtractor for the given open document.
*
* @param doc the given open document
*/
public POIXMLPropertiesTextExtractor(POIXMLDocument doc) {
super(doc);
/**
* Creates a new POIXMLPropertiesTextExtractor for the given open document.
*
* @param doc the given open document
*/
public POIXMLPropertiesTextExtractor(POIXMLDocument doc) {
super(doc);
DateFormatSymbols dfs = DateFormatSymbols.getInstance(Locale.ROOT);
dateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss zzz yyyy", dfs);
dateFormat.setTimeZone(LocaleUtil.TIMEZONE_UTC);
}
/**
* Creates a new POIXMLPropertiesTextExtractor, for the
* same file that another TextExtractor is already
* working on.
*
* @param otherExtractor the extractor referencing the given file
*/
public POIXMLPropertiesTextExtractor(POIXMLTextExtractor otherExtractor) {
this(otherExtractor.getDocument());
}
private void appendIfPresent(StringBuffer text, String thing, boolean value) {
appendIfPresent(text, thing, Boolean.toString(value));
}
private void appendIfPresent(StringBuffer text, String thing, int value) {
appendIfPresent(text, thing, Integer.toString(value));
}
private void appendIfPresent(StringBuffer text, String thing, Date value) {
if(value == null) { return; }
appendIfPresent(text, thing, dateFormat.format(value));
}
private void appendIfPresent(StringBuffer text, String thing, String value) {
if(value == null) { return; }
text.append(thing);
text.append(" = ");
text.append(value);
text.append("\n");
}
dateFormat.setTimeZone(LocaleUtil.TIMEZONE_UTC);
}
/**
* Returns the core document properties, eg author
*
* @return the core document properties
*/
@SuppressWarnings("resource")
/**
* Creates a new POIXMLPropertiesTextExtractor, for the
* same file that another TextExtractor is already
* working on.
*
* @param otherExtractor the extractor referencing the given file
*/
public POIXMLPropertiesTextExtractor(POIXMLTextExtractor otherExtractor) {
this(otherExtractor.getDocument());
}
private void appendIfPresent(StringBuilder text, String thing, boolean value) {
appendIfPresent(text, thing, Boolean.toString(value));
}
private void appendIfPresent(StringBuilder text, String thing, int value) {
appendIfPresent(text, thing, Integer.toString(value));
}
private void appendIfPresent(StringBuilder text, String thing, Date value) {
if (value == null) {
return;
}
appendIfPresent(text, thing, dateFormat.format(value));
}
private void appendIfPresent(StringBuilder text, String thing, String value) {
if (value == null) {
return;
}
text.append(thing);
text.append(" = ");
text.append(value);
text.append("\n");
}
/**
* Returns the core document properties, eg author
*
* @return the core document properties
*/
@SuppressWarnings("resource")
public String getCorePropertiesText() {
POIXMLDocument document = getDocument();
if(document == null) { // event based extractor does not have a document
POIXMLDocument document = getDocument();
if (document == null) { // event based extractor does not have a document
return "";
}
StringBuffer text = new StringBuffer();
StringBuilder text = new StringBuilder(64);
PackagePropertiesPart props =
document.getProperties().getCoreProperties().getUnderlyingProperties();
document.getProperties().getCoreProperties().getUnderlyingProperties();
appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
appendIfPresent(text, "ContentStatus", props.getContentStatusProperty().getValue());
appendIfPresent(text, "ContentType", props.getContentTypeProperty().getValue());
appendIfPresent(text, "Created", props.getCreatedProperty().getValue());
appendIfPresent(text, "CreatedString", props.getCreatedPropertyString());
appendIfPresent(text, "Creator", props.getCreatorProperty().getValue());
appendIfPresent(text, "Description", props.getDescriptionProperty().getValue());
appendIfPresent(text, "Identifier", props.getIdentifierProperty().getValue());
appendIfPresent(text, "Keywords", props.getKeywordsProperty().getValue());
appendIfPresent(text, "Language", props.getLanguageProperty().getValue());
appendIfPresent(text, "LastModifiedBy", props.getLastModifiedByProperty().getValue());
appendIfPresent(text, "LastPrinted", props.getLastPrintedProperty().getValue());
appendIfPresent(text, "LastPrintedString", props.getLastPrintedPropertyString());
appendIfPresent(text, "Modified", props.getModifiedProperty().getValue());
appendIfPresent(text, "ModifiedString", props.getModifiedPropertyString());
appendIfPresent(text, "Revision", props.getRevisionProperty().getValue());
appendIfPresent(text, "Subject", props.getSubjectProperty().getValue());
appendIfPresent(text, "Title", props.getTitleProperty().getValue());
appendIfPresent(text, "Version", props.getVersionProperty().getValue());
appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
appendIfPresent(text, "ContentStatus", props.getContentStatusProperty().getValue());
appendIfPresent(text, "ContentType", props.getContentTypeProperty().getValue());
appendIfPresent(text, "Created", props.getCreatedProperty().getValue());
appendIfPresent(text, "CreatedString", props.getCreatedPropertyString());
appendIfPresent(text, "Creator", props.getCreatorProperty().getValue());
appendIfPresent(text, "Description", props.getDescriptionProperty().getValue());
appendIfPresent(text, "Identifier", props.getIdentifierProperty().getValue());
appendIfPresent(text, "Keywords", props.getKeywordsProperty().getValue());
appendIfPresent(text, "Language", props.getLanguageProperty().getValue());
appendIfPresent(text, "LastModifiedBy", props.getLastModifiedByProperty().getValue());
appendIfPresent(text, "LastPrinted", props.getLastPrintedProperty().getValue());
appendIfPresent(text, "LastPrintedString", props.getLastPrintedPropertyString());
appendIfPresent(text, "Modified", props.getModifiedProperty().getValue());
appendIfPresent(text, "ModifiedString", props.getModifiedPropertyString());
appendIfPresent(text, "Revision", props.getRevisionProperty().getValue());
appendIfPresent(text, "Subject", props.getSubjectProperty().getValue());
appendIfPresent(text, "Title", props.getTitleProperty().getValue());
appendIfPresent(text, "Version", props.getVersionProperty().getValue());
return text.toString();
}
/**
* Returns the extended document properties, eg application
*
* @return the extended document properties
*/
@SuppressWarnings("resource")
return text.toString();
}
/**
* Returns the extended document properties, eg application
*
* @return the extended document properties
*/
@SuppressWarnings("resource")
public String getExtendedPropertiesText() {
POIXMLDocument document = getDocument();
if(document == null) { // event based extractor does not have a document
if (document == null) { // event based extractor does not have a document
return "";
}
StringBuffer text = new StringBuffer();
org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
props = document.getProperties().getExtendedProperties().getUnderlyingProperties();
StringBuilder text = new StringBuilder(64);
org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
props = document.getProperties().getExtendedProperties().getUnderlyingProperties();
appendIfPresent(text, "Application", props.getApplication());
appendIfPresent(text, "AppVersion", props.getAppVersion());
appendIfPresent(text, "Characters", props.getCharacters());
appendIfPresent(text, "CharactersWithSpaces", props.getCharactersWithSpaces());
appendIfPresent(text, "Company", props.getCompany());
appendIfPresent(text, "HyperlinkBase", props.getHyperlinkBase());
appendIfPresent(text, "HyperlinksChanged", props.getHyperlinksChanged());
appendIfPresent(text, "Lines", props.getLines());
appendIfPresent(text, "LinksUpToDate", props.getLinksUpToDate());
appendIfPresent(text, "Manager", props.getManager());
appendIfPresent(text, "Pages", props.getPages());
appendIfPresent(text, "Paragraphs", props.getParagraphs());
appendIfPresent(text, "PresentationFormat", props.getPresentationFormat());
appendIfPresent(text, "Template", props.getTemplate());
appendIfPresent(text, "TotalTime", props.getTotalTime());
appendIfPresent(text, "Application", props.getApplication());
appendIfPresent(text, "AppVersion", props.getAppVersion());
appendIfPresent(text, "Characters", props.getCharacters());
appendIfPresent(text, "CharactersWithSpaces", props.getCharactersWithSpaces());
appendIfPresent(text, "Company", props.getCompany());
appendIfPresent(text, "HyperlinkBase", props.getHyperlinkBase());
appendIfPresent(text, "HyperlinksChanged", props.getHyperlinksChanged());
appendIfPresent(text, "Lines", props.getLines());
appendIfPresent(text, "LinksUpToDate", props.getLinksUpToDate());
appendIfPresent(text, "Manager", props.getManager());
appendIfPresent(text, "Pages", props.getPages());
appendIfPresent(text, "Paragraphs", props.getParagraphs());
appendIfPresent(text, "PresentationFormat", props.getPresentationFormat());
appendIfPresent(text, "Template", props.getTemplate());
appendIfPresent(text, "TotalTime", props.getTotalTime());
return text.toString();
}
/**
* Returns the custom document properties, if there are any
*
* @return the custom document properties
*/
@SuppressWarnings({ "resource" })
public String getCustomPropertiesText() {
POIXMLDocument document = getDocument();
if(document == null) { // event based extractor does not have a document
return "";
}
return text.toString();
}
StringBuilder text = new StringBuilder();
org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
props = document.getProperties().getCustomProperties().getUnderlyingProperties();
/**
* Returns the custom document properties, if there are any
*
* @return the custom document properties
*/
@SuppressWarnings({"resource"})
public String getCustomPropertiesText() {
POIXMLDocument document = getDocument();
if (document == null) { // event based extractor does not have a document
return "";
}
for(CTProperty property : props.getPropertyArray()) {
String val = "(not implemented!)";
StringBuilder text = new StringBuilder();
org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
props = document.getProperties().getCustomProperties().getUnderlyingProperties();
if (property.isSetLpwstr()) {
val = property.getLpwstr();
}
else if (property.isSetLpstr()) {
val = property.getLpstr();
}
else if (property.isSetDate()) {
val = property.getDate().toString();
}
else if (property.isSetFiletime()) {
val = property.getFiletime().toString();
}
else if (property.isSetBool()) {
val = Boolean.toString( property.getBool() );
}
for (CTProperty property : props.getPropertyArray()) {
String val = "(not implemented!)";
// Integers
else if (property.isSetI1()) {
val = Integer.toString(property.getI1());
}
else if (property.isSetI2()) {
val = Integer.toString(property.getI2());
}
else if (property.isSetI4()) {
val = Integer.toString(property.getI4());
}
else if (property.isSetI8()) {
val = Long.toString(property.getI8());
}
else if (property.isSetInt()) {
val = Integer.toString( property.getInt() );
}
// Unsigned Integers
else if (property.isSetUi1()) {
val = Integer.toString(property.getUi1());
}
else if (property.isSetUi2()) {
val = Integer.toString(property.getUi2());
}
else if (property.isSetUi4()) {
val = Long.toString(property.getUi4());
}
else if (property.isSetUi8()) {
val = property.getUi8().toString();
}
else if (property.isSetUint()) {
val = Long.toString(property.getUint());
}
// Reals
else if (property.isSetR4()) {
val = Float.toString( property.getR4() );
}
else if (property.isSetR8()) {
val = Double.toString( property.getR8() );
}
else if (property.isSetDecimal()) {
BigDecimal d = property.getDecimal();
if (d == null) {
val = null;
} else {
val = d.toPlainString();
if (property.isSetLpwstr()) {
val = property.getLpwstr();
} else if (property.isSetLpstr()) {
val = property.getLpstr();
} else if (property.isSetDate()) {
val = property.getDate().toString();
} else if (property.isSetFiletime()) {
val = property.getFiletime().toString();
} else if (property.isSetBool()) {
val = Boolean.toString(property.getBool());
}
// Integers
else if (property.isSetI1()) {
val = Integer.toString(property.getI1());
} else if (property.isSetI2()) {
val = Integer.toString(property.getI2());
} else if (property.isSetI4()) {
val = Integer.toString(property.getI4());
} else if (property.isSetI8()) {
val = Long.toString(property.getI8());
} else if (property.isSetInt()) {
val = Integer.toString(property.getInt());
}
// Unsigned Integers
else if (property.isSetUi1()) {
val = Integer.toString(property.getUi1());
} else if (property.isSetUi2()) {
val = Integer.toString(property.getUi2());
} else if (property.isSetUi4()) {
val = Long.toString(property.getUi4());
} else if (property.isSetUi8()) {
val = property.getUi8().toString();
} else if (property.isSetUint()) {
val = Long.toString(property.getUint());
}
// Reals
else if (property.isSetR4()) {
val = Float.toString(property.getR4());
} else if (property.isSetR8()) {
val = Double.toString(property.getR8());
} else if (property.isSetDecimal()) {
BigDecimal d = property.getDecimal();
if (d == null) {
val = null;
} else {
val = d.toPlainString();
}
}
}
/*else if (property.isSetArray()) {
// TODO Fetch the array values and output
@ -254,26 +249,26 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
// TODO Decode, if possible
}*/
text.append(property.getName()).append(" = ").append(val).append("\n");
}
text.append(property.getName()).append(" = ").append(val).append("\n");
}
return text.toString();
}
return text.toString();
}
@Override
@Override
public String getText() {
try {
return
getCorePropertiesText() +
getExtendedPropertiesText() +
getCustomPropertiesText();
} catch(Exception e) {
throw new RuntimeException(e);
}
}
try {
return
getCorePropertiesText() +
getExtendedPropertiesText() +
getCustomPropertiesText();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Override
@Override
public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!");
}
throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!");
}
}

View File

@ -601,7 +601,7 @@ public final class PackagingURIHelper {
* characters.
*/
public static String decodeURI(URI uri) {
StringBuffer retVal = new StringBuffer();
StringBuilder retVal = new StringBuilder(64);
String uriStr = uri.toASCIIString();
char c;
final int length = uriStr.length();

View File

@ -188,16 +188,16 @@ public final class ContentType {
}
public final String toString(boolean withParameters) {
StringBuffer retVal = new StringBuffer();
StringBuilder retVal = new StringBuilder(64);
retVal.append(this.getType());
retVal.append("/");
retVal.append('/');
retVal.append(this.getSubType());
if (withParameters) {
for (Entry<String, String> me : parameters.entrySet()) {
retVal.append(";");
retVal.append(';');
retVal.append(me.getKey());
retVal.append("=");
retVal.append('=');
retVal.append(me.getValue());
}
}

View File

@ -223,7 +223,7 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
//// ContentHandler methods ////
private StringBuffer characters;
private StringBuilder characters;
private boolean tIsOpen;
private boolean inRPh;
@ -241,7 +241,7 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
this.strings = new ArrayList<>(this.uniqueCount);
this.phoneticStrings = new HashMap<>();
characters = new StringBuffer();
characters = new StringBuilder(64);
} else if ("si".equals(localName)) {
characters.setLength(0);
} else if ("t".equals(localName)) {

View File

@ -102,9 +102,9 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
private boolean formulasNotResults;
// Gathers characters as they are seen.
private StringBuffer value = new StringBuffer();
private StringBuffer formula = new StringBuffer();
private StringBuffer headerFooter = new StringBuffer();
private StringBuilder value = new StringBuilder(64);
private StringBuilder formula = new StringBuilder(64);
private StringBuilder headerFooter = new StringBuilder(64);
private Queue<CellAddress> commentCellRefs;

View File

@ -122,7 +122,7 @@ public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable();
XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData();
StringBuffer text = new StringBuffer();
StringBuilder text = new StringBuilder(64);
SheetTextExtractor sheetExtractor = new SheetTextExtractor();
XSSFBHyperlinksTable hyperlinksTable = null;
while (iter.hasNext()) {

View File

@ -52,10 +52,10 @@ import org.xml.sax.XMLReader;
/**
* Implementation of a text extractor from OOXML Excel
* files that uses SAX event based parsing.
* files that uses SAX event based parsing.
*/
public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
implements org.apache.poi.ss.extractor.ExcelExtractor {
public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
implements org.apache.poi.ss.extractor.ExcelExtractor {
private static final POILogger LOGGER = POILogFactory.getLogger(XSSFEventBasedExcelExtractor.class);
@ -73,6 +73,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
this(OPCPackage.open(path));
}
public XSSFEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
super(null);
this.container = container;
@ -81,13 +82,13 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
}
public static void main(String[] args) throws Exception {
if(args.length < 1) {
if (args.length < 1) {
System.err.println("Use:");
System.err.println(" XSSFEventBasedExcelExtractor <filename.xlsx>");
System.exit(1);
}
POIXMLTextExtractor extractor =
new XSSFEventBasedExcelExtractor(args[0]);
new XSSFEventBasedExcelExtractor(args[0]);
System.out.println(extractor.getText());
extractor.close();
}
@ -101,9 +102,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
/**
*
* @return whether to include sheet names
*
* @since 3.16-beta3
*/
public boolean getIncludeSheetNames() {
@ -112,16 +111,14 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
/**
* Should we return the formula itself, and not
* the result it produces? Default is false
* the result it produces? Default is false
*/
public void setFormulasNotResults(boolean formulasNotResults) {
this.formulasNotResults = formulasNotResults;
}
/**
*
* @return whether to include formulas but not results
*
* @since 3.16-beta3
*/
public boolean getFormulasNotResults() {
@ -136,14 +133,13 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
}
/**
*
* @return whether or not to include headers and footers
*
* @since 3.16-beta3
*/
public boolean getIncludeHeadersFooters() {
return includeHeadersFooters;
}
/**
* Should text from textboxes be included? Default is true
*/
@ -152,14 +148,13 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
}
/**
*
* @return whether or not to extract textboxes
*
* @since 3.16-beta3
*/
public boolean getIncludeTextBoxes() {
return includeTextBoxes;
}
/**
* Should cell comments be included? Default is false
*/
@ -169,32 +164,34 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
/**
* @return whether cell comments should be included
*
* @since 3.16-beta3
*/
public boolean getIncludeCellComments() {
return includeCellComments;
}
/**
* Concatenate text from &lt;rPh&gt; text elements in SharedStringsTable
* Default is true;
*
* @param concatenatePhoneticRuns true if runs should be concatenated, false otherwise
*/
public void setConcatenatePhoneticRuns(boolean concatenatePhoneticRuns) {
this.concatenatePhoneticRuns = concatenatePhoneticRuns;
}
public void setLocale(Locale locale) {
this.locale = locale;
}
/**
* @return locale
*
* @since 3.16-beta3
*/
public Locale getLocale() {
return locale;
}
/**
* Returns the opened OPCPackage container.
*/
@ -210,6 +207,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
public CoreProperties getCoreProperties() {
return properties.getCoreProperties();
}
/**
* Returns the extended document properties
*/
@ -217,6 +215,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
public ExtendedProperties getExtendedProperties() {
return properties.getExtendedProperties();
}
/**
* Returns the custom document properties
*/
@ -226,7 +225,6 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
}
/**
* Processes the given sheet
*/
@ -238,96 +236,95 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
InputStream sheetInputStream)
throws IOException, SAXException {
DataFormatter formatter;
if(locale == null) {
formatter = new DataFormatter();
} else {
formatter = new DataFormatter(locale);
}
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(
styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch(ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
}
}
DataFormatter formatter;
if (locale == null) {
formatter = new DataFormatter();
} else {
formatter = new DataFormatter(locale);
}
/**
* Processes the file and returns the text
*/
public String getText() {
try {
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns);
XSSFReader xssfReader = new XSSFReader(container);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
StringBuffer text = new StringBuffer();
SheetTextExtractor sheetExtractor = new SheetTextExtractor();
while (iter.hasNext()) {
InputStream stream = iter.next();
if(includeSheetNames) {
text.append(iter.getSheetName());
text.append('\n');
}
CommentsTable comments = includeCellComments ? iter.getSheetComments() : null;
processSheet(sheetExtractor, styles, comments, strings, stream);
if (includeHeadersFooters) {
sheetExtractor.appendHeaderText(text);
}
sheetExtractor.appendCellText(text);
if (includeTextBoxes){
processShapes(iter.getShapes(), text);
}
if (includeHeadersFooters) {
sheetExtractor.appendFooterText(text);
}
sheetExtractor.reset();
stream.close();
}
return text.toString();
} catch(IOException | OpenXML4JException | SAXException e) {
LOGGER.log(POILogger.WARN, e);
return null;
}
}
void processShapes(List<XSSFShape> shapes, StringBuffer text) {
if (shapes == null){
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(
styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch (ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
}
}
/**
* Processes the file and returns the text
*/
public String getText() {
try {
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns);
XSSFReader xssfReader = new XSSFReader(container);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
StringBuilder text = new StringBuilder(64);
SheetTextExtractor sheetExtractor = new SheetTextExtractor();
while (iter.hasNext()) {
InputStream stream = iter.next();
if (includeSheetNames) {
text.append(iter.getSheetName());
text.append('\n');
}
CommentsTable comments = includeCellComments ? iter.getSheetComments() : null;
processSheet(sheetExtractor, styles, comments, strings, stream);
if (includeHeadersFooters) {
sheetExtractor.appendHeaderText(text);
}
sheetExtractor.appendCellText(text);
if (includeTextBoxes) {
processShapes(iter.getShapes(), text);
}
if (includeHeadersFooters) {
sheetExtractor.appendFooterText(text);
}
sheetExtractor.reset();
stream.close();
}
return text.toString();
} catch (IOException | OpenXML4JException | SAXException e) {
LOGGER.log(POILogger.WARN, e);
return null;
}
}
void processShapes(List<XSSFShape> shapes, StringBuilder text) {
if (shapes == null) {
return;
}
for (XSSFShape shape : shapes){
if (shape instanceof XSSFSimpleShape){
String sText = ((XSSFSimpleShape)shape).getText();
if (sText != null && sText.length() > 0){
for (XSSFShape shape : shapes) {
if (shape instanceof XSSFSimpleShape) {
String sText = ((XSSFSimpleShape) shape).getText();
if (sText != null && sText.length() > 0) {
text.append(sText).append('\n');
}
}
}
}
@Override
public void close() throws IOException {
if (container != null) {
container.close();
container = null;
}
super.close();
}
public void close() throws IOException {
if (container != null) {
container.close();
container = null;
}
super.close();
}
protected class SheetTextExtractor implements SheetContentsHandler {
private final StringBuffer output;
private final StringBuilder output = new StringBuilder(64);
private boolean firstCellOfRow;
private final Map<String, String> headerFooterMap;
protected SheetTextExtractor() {
this.output = new StringBuffer();
this.firstCellOfRow = true;
this.headerFooterMap = includeHeadersFooters ? new HashMap<>() : null;
}
@ -344,7 +341,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
@Override
public void cell(String cellRef, String formattedValue, XSSFComment comment) {
if(firstCellOfRow) {
if (firstCellOfRow) {
firstCellOfRow = false;
} else {
output.append('\t');
@ -375,7 +372,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
/**
* Append the text for the named header or footer if found.
*/
private void appendHeaderFooterText(StringBuffer buffer, String name) {
private void appendHeaderFooterText(StringBuilder buffer, String name) {
String text = headerFooterMap.get(name);
if (text != null && text.length() > 0) {
// this is a naive way of handling the left, center, and right
@ -387,6 +384,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
buffer.append(text).append('\n');
}
}
/**
* Remove the delimiter if its found at the beginning of the text,
* or replace it with a tab if its in the middle.
@ -405,10 +403,11 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
/**
* Append the text for each header type in the same order
* they are appended in XSSFExcelExtractor.
*
* @see XSSFExcelExtractor#getText()
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
*/
void appendHeaderText(StringBuffer buffer) {
void appendHeaderText(StringBuilder buffer) {
appendHeaderFooterText(buffer, "firstHeader");
appendHeaderFooterText(buffer, "oddHeader");
appendHeaderFooterText(buffer, "evenHeader");
@ -417,10 +416,11 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
/**
* Append the text for each footer type in the same order
* they are appended in XSSFExcelExtractor.
*
* @see XSSFExcelExtractor#getText()
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
*/
void appendFooterText(StringBuffer buffer) {
void appendFooterText(StringBuilder buffer) {
// append the text for each footer type in the same order
// they are appended in XSSFExcelExtractor
appendHeaderFooterText(buffer, "firstFooter");
@ -431,7 +431,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
/**
* Append the cell contents we have collected.
*/
void appendCellText(StringBuffer buffer) {
void appendCellText(StringBuilder buffer) {
checkMaxTextSize(buffer, output.toString());
buffer.append(output);
}

View File

@ -292,7 +292,7 @@ public final class XSSFChart extends POIXMLDocumentPart implements Chart, ChartA
// TODO Do properly
CTTitle title = chart.getTitle();
StringBuffer text = new StringBuffer();
StringBuilder text = new StringBuilder(64);
XmlObject[] t = title
.selectPath("declare namespace a='"+XSSFDrawing.NAMESPACE_A+"' .//a:t");
for (int m = 0; m < t.length; m++) {

View File

@ -98,7 +98,7 @@ public class HeaderFooterHelper {
return joinParts(parts[0], parts[1], parts[2]);
}
private String joinParts(String l, String c, String r) {
StringBuffer ret = new StringBuffer();
StringBuilder ret = new StringBuilder(64);
// Join as c, l, r
if(c.length() > 0) {

View File

@ -97,7 +97,7 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
}
public String getText() {
StringBuffer text = new StringBuffer();
StringBuilder text = new StringBuilder(64);
XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
// Start out with all headers
@ -115,7 +115,7 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
return text.toString();
}
public void appendBodyElementText(StringBuffer text, IBodyElement e) {
public void appendBodyElementText(StringBuilder text, IBodyElement e) {
if (e instanceof XWPFParagraph) {
appendParagraphText(text, (XWPFParagraph) e);
} else if (e instanceof XWPFTable) {
@ -125,7 +125,7 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
}
}
public void appendParagraphText(StringBuffer text, XWPFParagraph paragraph) {
public void appendParagraphText(StringBuilder text, XWPFParagraph paragraph) {
CTSectPr ctSectPr = null;
if (paragraph.getCTP().getPPr() != null) {
ctSectPr = paragraph.getCTP().getPPr().getSectPr();
@ -170,7 +170,7 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
}
}
private void appendTableText(StringBuffer text, XWPFTable table) {
private void appendTableText(StringBuilder text, XWPFTable table) {
//this works recursively to pull embedded tables from tables
for (XWPFTableRow row : table.getRows()) {
List<ICell> cells = row.getTableICells();
@ -189,7 +189,7 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
}
}
private void extractFooters(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
private void extractFooters(StringBuilder text, XWPFHeaderFooterPolicy hfPolicy) {
if (hfPolicy == null) return;
if (hfPolicy.getFirstPageFooter() != null) {
@ -203,7 +203,7 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
}
}
private void extractHeaders(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
private void extractHeaders(StringBuilder text, XWPFHeaderFooterPolicy hfPolicy) {
if (hfPolicy == null) return;
if (hfPolicy.getFirstPageHeader() != null) {

View File

@ -27,7 +27,7 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTMarkupRange;
* @author Yury Batrakov (batrakov at gmail.com)
*/
public class XWPFCommentsDecorator extends XWPFParagraphDecorator {
private StringBuffer commentText;
private StringBuilder commentText;
public XWPFCommentsDecorator(XWPFParagraphDecorator nextDecorator) {
this(nextDecorator.paragraph, nextDecorator);
@ -37,11 +37,15 @@ public class XWPFCommentsDecorator extends XWPFParagraphDecorator {
super(paragraph, nextDecorator);
XWPFComment comment;
commentText = new StringBuffer();
commentText = new StringBuilder(64);
for (CTMarkupRange anchor : paragraph.getCTP().getCommentRangeStartArray()) {
if ((comment = paragraph.getDocument().getCommentByID(anchor.getId().toString())) != null)
commentText.append("\tComment by " + comment.getAuthor() + ": " + comment.getText());
if ((comment = paragraph.getDocument().getCommentByID(anchor.getId().toString())) != null) {
commentText.append("\tComment by ")
.append(comment.getAuthor())
.append(": ")
.append(comment.getText());
}
}
}

View File

@ -27,10 +27,10 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
public class XWPFComment {
protected String id;
protected String author;
protected StringBuffer text;
protected StringBuilder text;
public XWPFComment(CTComment comment, XWPFDocument document) {
text = new StringBuffer();
text = new StringBuilder(64);
id = comment.getId().toString();
author = comment.getAuthor();

View File

@ -127,7 +127,7 @@ public abstract class XWPFHeaderFooter extends POIXMLDocumentPart implements IBo
* by flattening out the text of its paragraph(s)
*/
public String getText() {
StringBuffer t = new StringBuffer();
StringBuilder t = new StringBuilder(64);
//TODO: simplify this to get ibody elements in order
for (int i = 0; i < paragraphs.size(); i++) {
if (!paragraphs.get(i).isEmpty()) {

View File

@ -45,7 +45,7 @@ public class XWPFParagraph implements IBodyElement, IRunBody, ISDTContents, Para
protected List<XWPFRun> runs;
protected List<IRunElement> iruns;
private StringBuffer footnoteText = new StringBuffer();
private StringBuilder footnoteText = new StringBuilder(64);
public XWPFParagraph(CTP prgrph, IBody part) {
this.paragraph = prgrph;
@ -184,7 +184,7 @@ public class XWPFParagraph implements IBodyElement, IRunBody, ISDTContents, Para
* and sdt elements in it.
*/
public String getText() {
StringBuffer out = new StringBuffer();
StringBuilder out = new StringBuilder(64);
for (IRunElement run : iruns) {
if (run instanceof XWPFRun) {
XWPFRun xRun = (XWPFRun) run;
@ -398,7 +398,7 @@ public class XWPFParagraph implements IBodyElement, IRunBody, ISDTContents, Para
* paragraph
*/
public String getParagraphText() {
StringBuffer out = new StringBuffer();
StringBuilder out = new StringBuilder(64);
for (XWPFRun run : runs) {
out.append(run);
}
@ -409,7 +409,7 @@ public class XWPFParagraph implements IBodyElement, IRunBody, ISDTContents, Para
* Returns any text from any suitable pictures in the paragraph
*/
public String getPictureText() {
StringBuffer out = new StringBuffer();
StringBuilder out = new StringBuilder(64);
for (XWPFRun run : runs) {
out.append(run.getPictureText());
}

View File

@ -1060,7 +1060,7 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
* carriage returns in place of their xml equivalents.
*/
public String text() {
StringBuffer text = new StringBuffer();
StringBuilder text = new StringBuilder(64);
// Grab the text and tabs of the text run
// Do so in a way that preserves the ordering
@ -1084,7 +1084,7 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
* @return the phonetic (ruby) string associated with this run or an empty String if none exists
*/
public String getPhonetic() {
StringBuffer text = new StringBuffer();
StringBuilder text = new StringBuilder(64);
// Grab the text and tabs of the text run
// Do so in a way that preserves the ordering
@ -1110,7 +1110,7 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
* @param text buffer to which to append the content
* @param extractPhonetic extract the phonetic (rt) component or the base component
*/
private void handleRuby(XmlObject rubyObj, StringBuffer text, boolean extractPhonetic) {
private void handleRuby(XmlObject rubyObj, StringBuilder text, boolean extractPhonetic) {
XmlCursor c = rubyObj.newCursor();
//according to the spec, a ruby object
@ -1141,7 +1141,7 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
c.dispose();
}
private void _getText(XmlObject o, StringBuffer text) {
private void _getText(XmlObject o, StringBuilder text) {
if (o instanceof CTText) {
String tagName = o.getDomNode().getNodeName();
@ -1170,10 +1170,10 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
}
if (o instanceof CTPTab) {
text.append("\t");
text.append('\t');
}
if (o instanceof CTBr) {
text.append("\n");
text.append('\n');
}
if (o instanceof CTEmpty) {
// Some inline text elements get returned not as
@ -1183,13 +1183,13 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
// rules for that case
String tagName = o.getDomNode().getNodeName();
if ("w:tab".equals(tagName) || "tab".equals(tagName)) {
text.append("\t");
text.append('\t');
}
if ("w:br".equals(tagName) || "br".equals(tagName)) {
text.append("\n");
text.append('\n');
}
if ("w:cr".equals(tagName) || "cr".equals(tagName)) {
text.append("\n");
text.append('\n');
}
}
if (o instanceof CTFtnEdnRef) {

View File

@ -71,7 +71,7 @@ public class XWPFTable implements IBodyElement, ISDTContents {
stBorderTypeMap.put(STBorder.INT_DOT_DASH, XWPFBorderType.DOT_DASH);
}
protected StringBuffer text = new StringBuffer();
protected StringBuilder text = new StringBuilder(64);
protected List<XWPFTableRow> tableRows;
// Unused: UUF_UNUSED_PUBLIC_OR_PROTECTED_FIELD

View File

@ -435,7 +435,7 @@ public class XWPFTableCell implements IBody, ICell {
*/
public String getTextRecursively() {
StringBuffer text = new StringBuffer();
StringBuilder text = new StringBuilder(64);
for (int i = 0; i < bodyElements.size(); i++) {
boolean isLast = (i == bodyElements.size() - 1);
appendBodyElementText(text, bodyElements.get(i), isLast);
@ -444,7 +444,7 @@ public class XWPFTableCell implements IBody, ICell {
return text.toString();
}
private void appendBodyElementText(StringBuffer text, IBodyElement e, boolean isLast) {
private void appendBodyElementText(StringBuilder text, IBodyElement e, boolean isLast) {
if (e instanceof XWPFParagraph) {
text.append(((XWPFParagraph) e).getText());
if (!isLast) {