diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java index ba1de4a54..32702fd8f 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java @@ -40,64 +40,64 @@ import org.apache.xmlbeans.XmlException; * Helper class to extract text from an OOXML Excel file */ public class XSSFExcelExtractor extends POIXMLTextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor { - public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] { - XSSFRelation.WORKBOOK, XSSFRelation.MACRO_TEMPLATE_WORKBOOK, - XSSFRelation.MACRO_ADDIN_WORKBOOK, XSSFRelation.TEMPLATE_WORKBOOK, - XSSFRelation.MACROS_WORKBOOK - }; - - private Locale locale; - private XSSFWorkbook workbook; - private boolean includeSheetNames = true; - private boolean formulasNotResults = false; - private boolean includeCellComments = false; - private boolean includeHeadersFooters = true; + public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] { + XSSFRelation.WORKBOOK, XSSFRelation.MACRO_TEMPLATE_WORKBOOK, + XSSFRelation.MACRO_ADDIN_WORKBOOK, XSSFRelation.TEMPLATE_WORKBOOK, + XSSFRelation.MACROS_WORKBOOK + }; + + private Locale locale; + private XSSFWorkbook workbook; + private boolean includeSheetNames = true; + private boolean formulasNotResults = false; + private boolean includeCellComments = false; + private boolean includeHeadersFooters = true; /** * @deprecated Use {@link #XSSFExcelExtractor(org.apache.poi.openxml4j.opc.OPCPackage)} instead. */ - public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException { - this(new XSSFWorkbook(path)); - } - public XSSFExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException { - this(new XSSFWorkbook(container)); - } - public XSSFExcelExtractor(XSSFWorkbook workbook) { - super(workbook); - this.workbook = workbook; - } + public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException { + this(new XSSFWorkbook(path)); + } + public XSSFExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException { + this(new XSSFWorkbook(container)); + } + public XSSFExcelExtractor(XSSFWorkbook workbook) { + super(workbook); + this.workbook = workbook; + } - public static void main(String[] args) throws Exception { - if(args.length < 1) { - System.err.println("Use:"); - System.err.println(" XSSFExcelExtractor "); - System.exit(1); - } - POIXMLTextExtractor extractor = - new XSSFExcelExtractor(args[0]); - System.out.println(extractor.getText()); - } + public static void main(String[] args) throws Exception { + if(args.length < 1) { + System.err.println("Use:"); + System.err.println(" XSSFExcelExtractor "); + System.exit(1); + } + POIXMLTextExtractor extractor = + new XSSFExcelExtractor(args[0]); + System.out.println(extractor.getText()); + } - /** - * Should sheet names be included? Default is true - */ - public void setIncludeSheetNames(boolean includeSheetNames) { - this.includeSheetNames = includeSheetNames; - } - /** - * Should we return the formula itself, and not - * the result it produces? Default is false - */ - public void setFormulasNotResults(boolean formulasNotResults) { - this.formulasNotResults = formulasNotResults; - } - /** - * Should cell comments be included? Default is true + /** + * Should sheet names be included? Default is true + */ + public void setIncludeSheetNames(boolean includeSheetNames) { + this.includeSheetNames = includeSheetNames; + } + /** + * Should we return the formula itself, and not + * the result it produces? Default is false + */ + public void setFormulasNotResults(boolean formulasNotResults) { + this.formulasNotResults = formulasNotResults; + } + /** + * Should cell comments be included? Default is false */ public void setIncludeCellComments(boolean includeCellComments) { this.includeCellComments = includeCellComments; } - /** + /** * Should headers and footers be included? Default is true */ public void setIncludeHeadersFooters(boolean includeHeadersFooters) { @@ -108,122 +108,122 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor implements org.apach * on the styles applied to the cells) */ public void setLocale(Locale locale) { - this.locale = locale; + this.locale = locale; } - - /** - * Retreives the text contents of the file - */ - public String getText() { - DataFormatter formatter; - if(locale == null) { - formatter = new DataFormatter(); - } else { - formatter = new DataFormatter(locale); - } - - StringBuffer text = new StringBuffer(); - for(int i=0; i ri = row.cellIterator(); ri.hasNext();) { - Cell cell = ri.next(); + StringBuffer text = new StringBuffer(); + for(int i=0; i ri = row.cellIterator(); ri.hasNext();) { + Cell cell = ri.next(); - if(ri.hasNext()) - text.append("\t"); - } - text.append("\n"); - } + // Is it a formula one? + if(cell.getCellType() == Cell.CELL_TYPE_FORMULA) { + if (formulasNotResults) { + text.append(cell.getCellFormula()); + } else { + if (cell.getCachedFormulaResultType() == Cell.CELL_TYPE_STRING) { + handleStringCell(text, cell); + } else { + handleNonStringCell(text, cell, formatter); + } + } + } else if(cell.getCellType() == Cell.CELL_TYPE_STRING) { + handleStringCell(text, cell); + } else { + handleNonStringCell(text, cell, formatter); + } - // Finally footer(s), if present - if(includeHeadersFooters) { - text.append( - extractHeaderFooter(sheet.getFirstFooter()) - ); - text.append( - extractHeaderFooter(sheet.getOddFooter()) - ); - text.append( - extractHeaderFooter(sheet.getEvenFooter()) - ); - } - } + // Output the comment, if requested and exists + Comment comment = cell.getCellComment(); + if(includeCellComments && comment != null) { + // Replace any newlines with spaces, otherwise it + // breaks the output + String commentText = comment.getString().getString().replace('\n', ' '); + text.append(" Comment by ").append(comment.getAuthor()).append(": ").append(commentText); + } - return text.toString(); - } - - private void handleStringCell(StringBuffer text, Cell cell) { - text.append(cell.getRichStringCellValue().getString()); - } - private void handleNonStringCell(StringBuffer text, Cell cell, DataFormatter formatter) { - int type = cell.getCellType(); - if (type == Cell.CELL_TYPE_FORMULA) { - type = cell.getCachedFormulaResultType(); - } + if(ri.hasNext()) + text.append("\t"); + } + text.append("\n"); + } - if (type == Cell.CELL_TYPE_NUMERIC) { - CellStyle cs = cell.getCellStyle(); + // Finally footer(s), if present + if(includeHeadersFooters) { + text.append( + extractHeaderFooter(sheet.getFirstFooter()) + ); + text.append( + extractHeaderFooter(sheet.getOddFooter()) + ); + text.append( + extractHeaderFooter(sheet.getEvenFooter()) + ); + } + } - if (cs.getDataFormatString() != null) { - text.append(formatter.formatRawCellContents( - cell.getNumericCellValue(), cs.getDataFormat(), cs.getDataFormatString() - )); - return; - } - } + return text.toString(); + } - // No supported styling applies to this cell - XSSFCell xcell = (XSSFCell)cell; - text.append( xcell.getRawValue() ); - } + private void handleStringCell(StringBuffer text, Cell cell) { + text.append(cell.getRichStringCellValue().getString()); + } + private void handleNonStringCell(StringBuffer text, Cell cell, DataFormatter formatter) { + int type = cell.getCellType(); + if (type == Cell.CELL_TYPE_FORMULA) { + type = cell.getCachedFormulaResultType(); + } - private String extractHeaderFooter(HeaderFooter hf) { - return ExcelExtractor._extractHeaderFooter(hf); - } + if (type == Cell.CELL_TYPE_NUMERIC) { + CellStyle cs = cell.getCellStyle(); + + if (cs.getDataFormatString() != null) { + text.append(formatter.formatRawCellContents( + cell.getNumericCellValue(), cs.getDataFormat(), cs.getDataFormatString() + )); + return; + } + } + + // No supported styling applies to this cell + XSSFCell xcell = (XSSFCell)cell; + text.append( xcell.getRawValue() ); + } + + private String extractHeaderFooter(HeaderFooter hf) { + return ExcelExtractor._extractHeaderFooter(hf); + } }