Fix indenting to be consistent, and correct the setIncludeCellComments javadoc to match the long standing default (#54871)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1496510 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dbd1a59416
commit
ed5588d9af
@ -40,64 +40,64 @@ import org.apache.xmlbeans.XmlException;
|
|||||||
* Helper class to extract text from an OOXML Excel file
|
* Helper class to extract text from an OOXML Excel file
|
||||||
*/
|
*/
|
||||||
public class XSSFExcelExtractor extends POIXMLTextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor {
|
public class XSSFExcelExtractor extends POIXMLTextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor {
|
||||||
public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] {
|
public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] {
|
||||||
XSSFRelation.WORKBOOK, XSSFRelation.MACRO_TEMPLATE_WORKBOOK,
|
XSSFRelation.WORKBOOK, XSSFRelation.MACRO_TEMPLATE_WORKBOOK,
|
||||||
XSSFRelation.MACRO_ADDIN_WORKBOOK, XSSFRelation.TEMPLATE_WORKBOOK,
|
XSSFRelation.MACRO_ADDIN_WORKBOOK, XSSFRelation.TEMPLATE_WORKBOOK,
|
||||||
XSSFRelation.MACROS_WORKBOOK
|
XSSFRelation.MACROS_WORKBOOK
|
||||||
};
|
};
|
||||||
|
|
||||||
private Locale locale;
|
private Locale locale;
|
||||||
private XSSFWorkbook workbook;
|
private XSSFWorkbook workbook;
|
||||||
private boolean includeSheetNames = true;
|
private boolean includeSheetNames = true;
|
||||||
private boolean formulasNotResults = false;
|
private boolean formulasNotResults = false;
|
||||||
private boolean includeCellComments = false;
|
private boolean includeCellComments = false;
|
||||||
private boolean includeHeadersFooters = true;
|
private boolean includeHeadersFooters = true;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @deprecated Use {@link #XSSFExcelExtractor(org.apache.poi.openxml4j.opc.OPCPackage)} instead.
|
* @deprecated Use {@link #XSSFExcelExtractor(org.apache.poi.openxml4j.opc.OPCPackage)} instead.
|
||||||
*/
|
*/
|
||||||
public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
|
public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
|
||||||
this(new XSSFWorkbook(path));
|
this(new XSSFWorkbook(path));
|
||||||
}
|
}
|
||||||
public XSSFExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
|
public XSSFExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
|
||||||
this(new XSSFWorkbook(container));
|
this(new XSSFWorkbook(container));
|
||||||
}
|
}
|
||||||
public XSSFExcelExtractor(XSSFWorkbook workbook) {
|
public XSSFExcelExtractor(XSSFWorkbook workbook) {
|
||||||
super(workbook);
|
super(workbook);
|
||||||
this.workbook = workbook;
|
this.workbook = workbook;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
if(args.length < 1) {
|
if(args.length < 1) {
|
||||||
System.err.println("Use:");
|
System.err.println("Use:");
|
||||||
System.err.println(" XSSFExcelExtractor <filename.xlsx>");
|
System.err.println(" XSSFExcelExtractor <filename.xlsx>");
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
}
|
}
|
||||||
POIXMLTextExtractor extractor =
|
POIXMLTextExtractor extractor =
|
||||||
new XSSFExcelExtractor(args[0]);
|
new XSSFExcelExtractor(args[0]);
|
||||||
System.out.println(extractor.getText());
|
System.out.println(extractor.getText());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should sheet names be included? Default is true
|
* Should sheet names be included? Default is true
|
||||||
*/
|
*/
|
||||||
public void setIncludeSheetNames(boolean includeSheetNames) {
|
public void setIncludeSheetNames(boolean includeSheetNames) {
|
||||||
this.includeSheetNames = includeSheetNames;
|
this.includeSheetNames = includeSheetNames;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Should we return the formula itself, and not
|
* Should we return the formula itself, and not
|
||||||
* the result it produces? Default is false
|
* the result it produces? Default is false
|
||||||
*/
|
*/
|
||||||
public void setFormulasNotResults(boolean formulasNotResults) {
|
public void setFormulasNotResults(boolean formulasNotResults) {
|
||||||
this.formulasNotResults = formulasNotResults;
|
this.formulasNotResults = formulasNotResults;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Should cell comments be included? Default is true
|
* Should cell comments be included? Default is false
|
||||||
*/
|
*/
|
||||||
public void setIncludeCellComments(boolean includeCellComments) {
|
public void setIncludeCellComments(boolean includeCellComments) {
|
||||||
this.includeCellComments = includeCellComments;
|
this.includeCellComments = includeCellComments;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Should headers and footers be included? Default is true
|
* Should headers and footers be included? Default is true
|
||||||
*/
|
*/
|
||||||
public void setIncludeHeadersFooters(boolean includeHeadersFooters) {
|
public void setIncludeHeadersFooters(boolean includeHeadersFooters) {
|
||||||
@ -108,122 +108,122 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor implements org.apach
|
|||||||
* on the styles applied to the cells)
|
* on the styles applied to the cells)
|
||||||
*/
|
*/
|
||||||
public void setLocale(Locale locale) {
|
public void setLocale(Locale locale) {
|
||||||
this.locale = locale;
|
this.locale = locale;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retreives the text contents of the file
|
* Retreives the text contents of the file
|
||||||
*/
|
*/
|
||||||
public String getText() {
|
public String getText() {
|
||||||
DataFormatter formatter;
|
DataFormatter formatter;
|
||||||
if(locale == null) {
|
if(locale == null) {
|
||||||
formatter = new DataFormatter();
|
formatter = new DataFormatter();
|
||||||
} else {
|
} else {
|
||||||
formatter = new DataFormatter(locale);
|
formatter = new DataFormatter(locale);
|
||||||
}
|
}
|
||||||
|
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuffer text = new StringBuffer();
|
||||||
for(int i=0; i<workbook.getNumberOfSheets(); i++) {
|
for(int i=0; i<workbook.getNumberOfSheets(); i++) {
|
||||||
XSSFSheet sheet = workbook.getSheetAt(i);
|
XSSFSheet sheet = workbook.getSheetAt(i);
|
||||||
if(includeSheetNames) {
|
if(includeSheetNames) {
|
||||||
text.append(workbook.getSheetName(i)).append("\n");
|
text.append(workbook.getSheetName(i)).append("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Header(s), if present
|
// Header(s), if present
|
||||||
if(includeHeadersFooters) {
|
if(includeHeadersFooters) {
|
||||||
text.append(
|
text.append(
|
||||||
extractHeaderFooter(sheet.getFirstHeader())
|
extractHeaderFooter(sheet.getFirstHeader())
|
||||||
);
|
);
|
||||||
text.append(
|
text.append(
|
||||||
extractHeaderFooter(sheet.getOddHeader())
|
extractHeaderFooter(sheet.getOddHeader())
|
||||||
);
|
);
|
||||||
text.append(
|
text.append(
|
||||||
extractHeaderFooter(sheet.getEvenHeader())
|
extractHeaderFooter(sheet.getEvenHeader())
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rows and cells
|
// Rows and cells
|
||||||
for (Object rawR : sheet) {
|
for (Object rawR : sheet) {
|
||||||
Row row = (Row)rawR;
|
Row row = (Row)rawR;
|
||||||
for(Iterator<Cell> ri = row.cellIterator(); ri.hasNext();) {
|
for(Iterator<Cell> ri = row.cellIterator(); ri.hasNext();) {
|
||||||
Cell cell = ri.next();
|
Cell cell = ri.next();
|
||||||
|
|
||||||
// Is it a formula one?
|
// Is it a formula one?
|
||||||
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA) {
|
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA) {
|
||||||
if (formulasNotResults) {
|
if (formulasNotResults) {
|
||||||
text.append(cell.getCellFormula());
|
text.append(cell.getCellFormula());
|
||||||
} else {
|
} else {
|
||||||
if (cell.getCachedFormulaResultType() == Cell.CELL_TYPE_STRING) {
|
if (cell.getCachedFormulaResultType() == Cell.CELL_TYPE_STRING) {
|
||||||
handleStringCell(text, cell);
|
handleStringCell(text, cell);
|
||||||
} else {
|
} else {
|
||||||
handleNonStringCell(text, cell, formatter);
|
handleNonStringCell(text, cell, formatter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if(cell.getCellType() == Cell.CELL_TYPE_STRING) {
|
} else if(cell.getCellType() == Cell.CELL_TYPE_STRING) {
|
||||||
handleStringCell(text, cell);
|
handleStringCell(text, cell);
|
||||||
} else {
|
} else {
|
||||||
handleNonStringCell(text, cell, formatter);
|
handleNonStringCell(text, cell, formatter);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Output the comment, if requested and exists
|
// Output the comment, if requested and exists
|
||||||
Comment comment = cell.getCellComment();
|
Comment comment = cell.getCellComment();
|
||||||
if(includeCellComments && comment != null) {
|
if(includeCellComments && comment != null) {
|
||||||
// Replace any newlines with spaces, otherwise it
|
// Replace any newlines with spaces, otherwise it
|
||||||
// breaks the output
|
// breaks the output
|
||||||
String commentText = comment.getString().getString().replace('\n', ' ');
|
String commentText = comment.getString().getString().replace('\n', ' ');
|
||||||
text.append(" Comment by ").append(comment.getAuthor()).append(": ").append(commentText);
|
text.append(" Comment by ").append(comment.getAuthor()).append(": ").append(commentText);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(ri.hasNext())
|
if(ri.hasNext())
|
||||||
text.append("\t");
|
text.append("\t");
|
||||||
}
|
}
|
||||||
text.append("\n");
|
text.append("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finally footer(s), if present
|
// Finally footer(s), if present
|
||||||
if(includeHeadersFooters) {
|
if(includeHeadersFooters) {
|
||||||
text.append(
|
text.append(
|
||||||
extractHeaderFooter(sheet.getFirstFooter())
|
extractHeaderFooter(sheet.getFirstFooter())
|
||||||
);
|
);
|
||||||
text.append(
|
text.append(
|
||||||
extractHeaderFooter(sheet.getOddFooter())
|
extractHeaderFooter(sheet.getOddFooter())
|
||||||
);
|
);
|
||||||
text.append(
|
text.append(
|
||||||
extractHeaderFooter(sheet.getEvenFooter())
|
extractHeaderFooter(sheet.getEvenFooter())
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return text.toString();
|
return text.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void handleStringCell(StringBuffer text, Cell cell) {
|
private void handleStringCell(StringBuffer text, Cell cell) {
|
||||||
text.append(cell.getRichStringCellValue().getString());
|
text.append(cell.getRichStringCellValue().getString());
|
||||||
}
|
}
|
||||||
private void handleNonStringCell(StringBuffer text, Cell cell, DataFormatter formatter) {
|
private void handleNonStringCell(StringBuffer text, Cell cell, DataFormatter formatter) {
|
||||||
int type = cell.getCellType();
|
int type = cell.getCellType();
|
||||||
if (type == Cell.CELL_TYPE_FORMULA) {
|
if (type == Cell.CELL_TYPE_FORMULA) {
|
||||||
type = cell.getCachedFormulaResultType();
|
type = cell.getCachedFormulaResultType();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type == Cell.CELL_TYPE_NUMERIC) {
|
if (type == Cell.CELL_TYPE_NUMERIC) {
|
||||||
CellStyle cs = cell.getCellStyle();
|
CellStyle cs = cell.getCellStyle();
|
||||||
|
|
||||||
if (cs.getDataFormatString() != null) {
|
if (cs.getDataFormatString() != null) {
|
||||||
text.append(formatter.formatRawCellContents(
|
text.append(formatter.formatRawCellContents(
|
||||||
cell.getNumericCellValue(), cs.getDataFormat(), cs.getDataFormatString()
|
cell.getNumericCellValue(), cs.getDataFormat(), cs.getDataFormatString()
|
||||||
));
|
));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// No supported styling applies to this cell
|
// No supported styling applies to this cell
|
||||||
XSSFCell xcell = (XSSFCell)cell;
|
XSSFCell xcell = (XSSFCell)cell;
|
||||||
text.append( xcell.getRawValue() );
|
text.append( xcell.getRawValue() );
|
||||||
}
|
}
|
||||||
|
|
||||||
private String extractHeaderFooter(HeaderFooter hf) {
|
private String extractHeaderFooter(HeaderFooter hf) {
|
||||||
return ExcelExtractor._extractHeaderFooter(hf);
|
return ExcelExtractor._extractHeaderFooter(hf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user