Update hssf.extractor.ExcelExtractor to optionally output blank cells too

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@697589 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-09-21 19:17:41 +00:00
parent 0c6a274259
commit fdc39d48a7
4 changed files with 95 additions and 53 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! -->
<release version="3.2-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">Update hssf.extractor.ExcelExtractor to optionally output blank cells too</action>
<action dev="POI-DEVELOPERS" type="add">Include the sheet name in the output of examples.XLS2CSVmra</action>
<action dev="POI-DEVELOPERS" type="fix">45784 - Support long chart titles in SeriesTextRecords</action>
<action dev="POI-DEVELOPERS" type="fix">45777 - Throw an exception if HSSF Footer or Header is attemped to be set too long, rather than having it break during writing out</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.2-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">Update hssf.extractor.ExcelExtractor to optionally output blank cells too</action>
<action dev="POI-DEVELOPERS" type="add">Include the sheet name in the output of examples.XLS2CSVmra</action>
<action dev="POI-DEVELOPERS" type="fix">45784 - Support long chart titles in SeriesTextRecords</action>
<action dev="POI-DEVELOPERS" type="fix">45777 - Throw an exception if HSSF Footer or Header is attemped to be set too long, rather than having it break during writing out</action>

View File

@ -44,6 +44,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
private boolean includeSheetNames = true;
private boolean formulasNotResults = false;
private boolean includeCellComments = false;
private boolean includeBlankCells = false;
public ExcelExtractor(HSSFWorkbook wb) {
super(wb);
@ -73,6 +74,14 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
public void setIncludeCellComments(boolean includeCellComments) {
this.includeCellComments = includeCellComments;
}
/**
* Should blank cells be output? Default is to only
* output cells that are present in the file and are
* non-blank.
*/
public void setIncludeBlankCells(boolean includeBlankCells) {
this.includeBlankCells = includeBlankCells;
}
/**
* Retreives the text contents of the file
@ -80,6 +89,11 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
public String getText() {
StringBuffer text = new StringBuffer();
// We don't care about the differnce between
// null (missing) and blank cells
wb.setMissingCellPolicy(HSSFRow.RETURN_BLANK_AS_NULL);
// Process each sheet in turn
for(int i=0;i<wb.getNumberOfSheets();i++) {
HSSFSheet sheet = wb.getSheetAt(i);
if(sheet == null) { continue; }
@ -108,63 +122,68 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
// Check each cell in turn
int firstCell = row.getFirstCellNum();
int lastCell = row.getLastCellNum();
if(includeBlankCells) {
firstCell = 0;
}
for(int k=firstCell;k<lastCell;k++) {
HSSFCell cell = row.getCell(k);
if(cell == null) { continue; }
boolean outputContents = true;
switch(cell.getCellType()) {
case HSSFCell.CELL_TYPE_BLANK:
outputContents = false;
break;
case HSSFCell.CELL_TYPE_STRING:
text.append(cell.getRichStringCellValue().getString());
break;
case HSSFCell.CELL_TYPE_NUMERIC:
// Note - we don't apply any formatting!
text.append(cell.getNumericCellValue());
break;
case HSSFCell.CELL_TYPE_BOOLEAN:
text.append(cell.getBooleanCellValue());
break;
case HSSFCell.CELL_TYPE_ERROR:
text.append(ErrorEval.getText(cell.getErrorCellValue()));
break;
case HSSFCell.CELL_TYPE_FORMULA:
if(formulasNotResults) {
text.append(cell.getCellFormula());
} else {
switch(cell.getCachedFormulaResultType()) {
case HSSFCell.CELL_TYPE_STRING:
HSSFRichTextString str = cell.getRichStringCellValue();
if(str != null && str.length() > 0) {
text.append(str.toString());
}
break;
case HSSFCell.CELL_TYPE_NUMERIC:
text.append(cell.getNumericCellValue());
break;
case HSSFCell.CELL_TYPE_BOOLEAN:
text.append(cell.getBooleanCellValue());
break;
case HSSFCell.CELL_TYPE_ERROR:
text.append(ErrorEval.getText(cell.getErrorCellValue()));
break;
if(cell == null) {
// Only output if requested
outputContents = includeBlankCells;
} else {
switch(cell.getCellType()) {
case HSSFCell.CELL_TYPE_STRING:
text.append(cell.getRichStringCellValue().getString());
break;
case HSSFCell.CELL_TYPE_NUMERIC:
// Note - we don't apply any formatting!
text.append(cell.getNumericCellValue());
break;
case HSSFCell.CELL_TYPE_BOOLEAN:
text.append(cell.getBooleanCellValue());
break;
case HSSFCell.CELL_TYPE_ERROR:
text.append(ErrorEval.getText(cell.getErrorCellValue()));
break;
case HSSFCell.CELL_TYPE_FORMULA:
if(formulasNotResults) {
text.append(cell.getCellFormula());
} else {
switch(cell.getCachedFormulaResultType()) {
case HSSFCell.CELL_TYPE_STRING:
HSSFRichTextString str = cell.getRichStringCellValue();
if(str != null && str.length() > 0) {
text.append(str.toString());
}
break;
case HSSFCell.CELL_TYPE_NUMERIC:
text.append(cell.getNumericCellValue());
break;
case HSSFCell.CELL_TYPE_BOOLEAN:
text.append(cell.getBooleanCellValue());
break;
case HSSFCell.CELL_TYPE_ERROR:
text.append(ErrorEval.getText(cell.getErrorCellValue()));
break;
}
}
}
break;
default:
throw new RuntimeException("Unexpected cell type (" + cell.getCellType() + ")");
}
break;
default:
throw new RuntimeException("Unexpected cell type (" + cell.getCellType() + ")");
}
// Output the comment, if requested and exists
HSSFComment comment = cell.getCellComment();
if(includeCellComments && comment != null) {
// Replace any newlines with spaces, otherwise it
// breaks the output
String commentText = comment.getString().getString().replace('\n', ' ');
text.append(" Comment by "+comment.getAuthor()+": "+commentText);
// Output the comment, if requested and exists
HSSFComment comment = cell.getCellComment();
if(includeCellComments && comment != null) {
// Replace any newlines with spaces, otherwise it
// breaks the output
String commentText = comment.getString().getString().replace('\n', ' ');
text.append(" Comment by "+comment.getAuthor()+": "+commentText);
}
}
// Output a tab if we're not on the last cell

View File

@ -187,6 +187,27 @@ public final class TestExcelExtractor extends TestCase {
);
}
public void testWithBlank() throws Exception {
ExcelExtractor extractor = createExtractor("MissingBits.xls");
String def = extractor.getText();
extractor.setIncludeBlankCells(true);
String padded = extractor.getText();
assertTrue(def.startsWith(
"Sheet1\n" +
"&[TAB]\t\n" +
"Hello\t\n" +
"11.0\t23.0\t\n"
));
assertTrue(padded.startsWith(
"Sheet1\n" +
"&[TAB]\t\n" +
"Hello\t\t\t\t\t\t\t\t\t\t\t\n" +
"11.0\t\t\t23.0\t\t\t\t\t\t\t\t\n"
));
}
/**
* Embded in a non-excel file