Bugzilla 45644 - adding a command line interface to hssf ExcelExtractor

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@737173 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Josh Micich 2009-01-23 20:12:13 +00:00
parent 59e334e8a5
commit d800ec6a84
3 changed files with 190 additions and 31 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! --> <!-- Don't forget to update status.xml too! -->
<release version="3.5-beta5" date="2008-??-??"> <release version="3.5-beta5" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">46544 - command line interface for hssf ExcelExtractor</action>
<action dev="POI-DEVELOPERS" type="fix">46547 - Allow addition of conditional formatting after data validation</action> <action dev="POI-DEVELOPERS" type="fix">46547 - Allow addition of conditional formatting after data validation</action>
<action dev="POI-DEVELOPERS" type="fix">46548 - Page Settings Block fixes - continued PLS records and PSB in sheet sub-streams</action> <action dev="POI-DEVELOPERS" type="fix">46548 - Page Settings Block fixes - continued PLS records and PSB in sheet sub-streams</action>
<action dev="POI-DEVELOPERS" type="add">46523 - added implementation for SUMIF function</action> <action dev="POI-DEVELOPERS" type="add">46523 - added implementation for SUMIF function</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! --> <!-- Don't forget to update changes.xml too! -->
<changes> <changes>
<release version="3.5-beta5" date="2008-??-??"> <release version="3.5-beta5" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">46544 - command line interface for hssf ExcelExtractor</action>
<action dev="POI-DEVELOPERS" type="fix">46547 - Allow addition of conditional formatting after data validation</action> <action dev="POI-DEVELOPERS" type="fix">46547 - Allow addition of conditional formatting after data validation</action>
<action dev="POI-DEVELOPERS" type="fix">46548 - Page Settings Block fixes - continued PLS records and PSB in sheet sub-streams</action> <action dev="POI-DEVELOPERS" type="fix">46548 - Page Settings Block fixes - continued PLS records and PSB in sheet sub-streams</action>
<action dev="POI-DEVELOPERS" type="add">46523 - added implementation for SUMIF function</action> <action dev="POI-DEVELOPERS" type="add">46523 - added implementation for SUMIF function</action>

View File

@ -17,10 +17,13 @@
package org.apache.poi.hssf.extractor; package org.apache.poi.hssf.extractor;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.ss.usermodel.HeaderFooter;
import org.apache.poi.hssf.record.formula.eval.ErrorEval; import org.apache.poi.hssf.record.formula.eval.ErrorEval;
import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFComment; import org.apache.poi.hssf.usermodel.HSSFComment;
@ -30,6 +33,7 @@ import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.HeaderFooter;
/** /**
* A text extractor for Excel files. * A text extractor for Excel files.
@ -41,15 +45,15 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
* @see org.apache.poi.hssf.eventusermodel.examples.XLS2CSVmra * @see org.apache.poi.hssf.eventusermodel.examples.XLS2CSVmra
*/ */
public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor { public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor {
private HSSFWorkbook wb; private HSSFWorkbook _wb;
private boolean includeSheetNames = true; private boolean _includeSheetNames = true;
private boolean formulasNotResults = false; private boolean _shouldEvaluateFormulas = true;
private boolean includeCellComments = false; private boolean _includeCellComments = false;
private boolean includeBlankCells = false; private boolean _includeBlankCells = false;
public ExcelExtractor(HSSFWorkbook wb) { public ExcelExtractor(HSSFWorkbook wb) {
super(wb); super(wb);
this.wb = wb; _wb = wb;
} }
public ExcelExtractor(POIFSFileSystem fs) throws IOException { public ExcelExtractor(POIFSFileSystem fs) throws IOException {
this(fs.getRoot(), fs); this(fs.getRoot(), fs);
@ -58,52 +62,205 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p
this(new HSSFWorkbook(dir, fs, true)); this(new HSSFWorkbook(dir, fs, true));
} }
private static final class CommandParseException extends Exception {
public CommandParseException(String msg) {
super(msg);
}
}
private static final class CommandArgs {
private final boolean _requestHelp;
private final File _inputFile;
private final boolean _showSheetNames;
private final boolean _evaluateFormulas;
private final boolean _showCellComments;
private final boolean _showBlankCells;
public CommandArgs(String[] args) throws CommandParseException {
int nArgs = args.length;
File inputFile = null;
boolean requestHelp = false;
boolean showSheetNames = true;
boolean evaluateFormulas = true;
boolean showCellComments = false;
boolean showBlankCells = false;
for (int i=0; i<nArgs; i++) {
String arg = args[i];
if ("-help".equalsIgnoreCase(arg)) {
requestHelp = true;
break;
}
if ("-i".equals(arg)) {
i++; // step to next arg
if (i >= nArgs) {
throw new CommandParseException("Expected filename after '-i'");
}
if (inputFile != null) {
throw new CommandParseException("Only one input file can be supplied");
}
inputFile = new File(arg);
if (!inputFile.exists()) {
throw new CommandParseException("Specified input file '" + arg + "' does not exist");
}
if (inputFile.isDirectory()) {
throw new CommandParseException("Specified input file '" + arg + "' is a directory");
}
continue;
}
if ("--show-sheet-names".equals(arg)) {
showSheetNames = parseBoolArg(args, ++i);
continue;
}
if ("--evaluate-formulas".equals(arg)) {
evaluateFormulas = parseBoolArg(args, ++i);
continue;
}
if ("--show-comments".equals(arg)) {
showCellComments = parseBoolArg(args, ++i);
continue;
}
if ("--show-blanks".equals(arg)) {
showBlankCells = parseBoolArg(args, ++i);
continue;
}
throw new CommandParseException("Invalid argument '" + arg + "'");
}
_requestHelp = requestHelp;
_inputFile = inputFile;
_showSheetNames = showSheetNames;
_evaluateFormulas = evaluateFormulas;
_showCellComments = showCellComments;
_showBlankCells = showBlankCells;
}
private static boolean parseBoolArg(String[] args, int i) throws CommandParseException {
if (i >= args.length) {
throw new CommandParseException("Expected value after '" + args[i-1] + "'");
}
String value = args[i].toUpperCase();
if ("Y".equals(value) || "YES".equals(value) || "ON".equals(value) || "TRUE".equals(value)) {
return true;
}
if ("N".equals(value) || "NO".equals(value) || "OFF".equals(value) || "FALSE".equals(value)) {
return false;
}
throw new CommandParseException("Invalid value '" + args[i] + "' for '" + args[i-1] + "'. Expected 'Y' or 'N'");
}
public boolean isRequestHelp() {
return _requestHelp;
}
public File getInputFile() {
return _inputFile;
}
public boolean shouldShowSheetNames() {
return _showSheetNames;
}
public boolean shouldEvaluateFormulas() {
return _evaluateFormulas;
}
public boolean shouldShowCellComments() {
return _showCellComments;
}
public boolean shouldShowBlankCells() {
return _showBlankCells;
}
}
private static void printUsageMessage(PrintStream ps) {
ps.println("Use:");
ps.println(" " + ExcelExtractor.class.getName() + " [<flag> <value> [<flag> <value> [...]]] [-i <filename.xls>]");
ps.println(" -i <filename.xls> specifies input file (default is to use stdin)");
ps.println(" Flags can be set on or off by using the values 'Y' or 'N'.");
ps.println(" Following are available flags and their default values:");
ps.println(" --show-sheet-names Y");
ps.println(" --evaluate-formulas Y");
ps.println(" --show-comments N");
ps.println(" --show-blanks Y");
}
/**
* Command line extractor.
*/
public static void main(String[] args) {
CommandArgs cmdArgs;
try {
cmdArgs = new CommandArgs(args);
} catch (CommandParseException e) {
System.err.println(e.getMessage());
printUsageMessage(System.err);
System.exit(1);
return; // suppress compiler error
}
if (cmdArgs.isRequestHelp()) {
printUsageMessage(System.out);
return;
}
try {
InputStream is;
if(cmdArgs.getInputFile() == null) {
is = System.in;
} else {
is = new FileInputStream(cmdArgs.getInputFile());
}
HSSFWorkbook wb = new HSSFWorkbook(is);
ExcelExtractor extractor = new ExcelExtractor(wb);
extractor.setIncludeSheetNames(cmdArgs.shouldShowSheetNames());
extractor.setFormulasNotResults(!cmdArgs.shouldEvaluateFormulas());
extractor.setIncludeCellComments(cmdArgs.shouldShowCellComments());
extractor.setIncludeBlankCells(cmdArgs.shouldShowBlankCells());
System.out.println(extractor.getText());
} catch (Exception e) {
e.printStackTrace();
System.exit(1);
}
}
/** /**
* Should sheet names be included? Default is true * Should sheet names be included? Default is true
*/ */
public void setIncludeSheetNames(boolean includeSheetNames) { public void setIncludeSheetNames(boolean includeSheetNames) {
this.includeSheetNames = includeSheetNames; _includeSheetNames = includeSheetNames;
} }
/** /**
* Should we return the formula itself, and not * Should we return the formula itself, and not
* the result it produces? Default is false * the result it produces? Default is false
*/ */
public void setFormulasNotResults(boolean formulasNotResults) { public void setFormulasNotResults(boolean formulasNotResults) {
this.formulasNotResults = formulasNotResults; _shouldEvaluateFormulas = !formulasNotResults;
} }
/** /**
* Should cell comments be included? Default is false * Should cell comments be included? Default is false
*/ */
public void setIncludeCellComments(boolean includeCellComments) { public void setIncludeCellComments(boolean includeCellComments) {
this.includeCellComments = includeCellComments; _includeCellComments = includeCellComments;
} }
/** /**
* Should blank cells be output? Default is to only * Should blank cells be output? Default is to only
* output cells that are present in the file and are * output cells that are present in the file and are
* non-blank. * non-blank.
*/ */
public void setIncludeBlankCells(boolean includeBlankCells) { public void setIncludeBlankCells(boolean includeBlankCells) {
this.includeBlankCells = includeBlankCells; _includeBlankCells = includeBlankCells;
} }
/** /**
* Retreives the text contents of the file * Retrieves the text contents of the file
*/ */
public String getText() { public String getText() {
StringBuffer text = new StringBuffer(); StringBuffer text = new StringBuffer();
// We don't care about the differnce between // We don't care about the difference between
// null (missing) and blank cells // null (missing) and blank cells
wb.setMissingCellPolicy(HSSFRow.RETURN_BLANK_AS_NULL); _wb.setMissingCellPolicy(HSSFRow.RETURN_BLANK_AS_NULL);
// Process each sheet in turn // Process each sheet in turn
for(int i=0;i<wb.getNumberOfSheets();i++) { for(int i=0;i<_wb.getNumberOfSheets();i++) {
HSSFSheet sheet = wb.getSheetAt(i); HSSFSheet sheet = _wb.getSheetAt(i);
if(sheet == null) { continue; } if(sheet == null) { continue; }
if(includeSheetNames) { if(_includeSheetNames) {
String name = wb.getSheetName(i); String name = _wb.getSheetName(i);
if(name != null) { if(name != null) {
text.append(name); text.append(name);
text.append("\n"); text.append("\n");
@ -126,7 +283,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p
// Check each cell in turn // Check each cell in turn
int firstCell = row.getFirstCellNum(); int firstCell = row.getFirstCellNum();
int lastCell = row.getLastCellNum(); int lastCell = row.getLastCellNum();
if(includeBlankCells) { if(_includeBlankCells) {
firstCell = 0; firstCell = 0;
} }
@ -136,7 +293,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p
if(cell == null) { if(cell == null) {
// Only output if requested // Only output if requested
outputContents = includeBlankCells; outputContents = _includeBlankCells;
} else { } else {
switch(cell.getCellType()) { switch(cell.getCellType()) {
case HSSFCell.CELL_TYPE_STRING: case HSSFCell.CELL_TYPE_STRING:
@ -153,7 +310,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p
text.append(ErrorEval.getText(cell.getErrorCellValue())); text.append(ErrorEval.getText(cell.getErrorCellValue()));
break; break;
case HSSFCell.CELL_TYPE_FORMULA: case HSSFCell.CELL_TYPE_FORMULA:
if(formulasNotResults) { if(!_shouldEvaluateFormulas) {
text.append(cell.getCellFormula()); text.append(cell.getCellFormula());
} else { } else {
switch(cell.getCachedFormulaResultType()) { switch(cell.getCachedFormulaResultType()) {
@ -181,12 +338,12 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p
} }
// Output the comment, if requested and exists // Output the comment, if requested and exists
HSSFComment comment = cell.getCellComment(); HSSFComment comment = cell.getCellComment();
if(includeCellComments && comment != null) { if(_includeCellComments && comment != null) {
// Replace any newlines with spaces, otherwise it // Replace any newlines with spaces, otherwise it
// breaks the output // breaks the output
String commentText = comment.getString().getString().replace('\n', ' '); String commentText = comment.getString().getString().replace('\n', ' ');
text.append(" Comment by "+comment.getAuthor()+": "+commentText); text.append(" Comment by "+comment.getAuthor()+": "+commentText);
} }
} }