Update hssf.extractor.ExcelExtractor to optionally output blank cells too

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@697589 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-09-21 19:17:41 +00:00
parent 0c6a274259
commit fdc39d48a7
4 changed files with 95 additions and 53 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! -->
<release version="3.2-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">Update hssf.extractor.ExcelExtractor to optionally output blank cells too</action>
<action dev="POI-DEVELOPERS" type="add">Include the sheet name in the output of examples.XLS2CSVmra</action>
<action dev="POI-DEVELOPERS" type="fix">45784 - Support long chart titles in SeriesTextRecords</action>
<action dev="POI-DEVELOPERS" type="fix">45777 - Throw an exception if HSSF Footer or Header is attemped to be set too long, rather than having it break during writing out</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.2-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">Update hssf.extractor.ExcelExtractor to optionally output blank cells too</action>
<action dev="POI-DEVELOPERS" type="add">Include the sheet name in the output of examples.XLS2CSVmra</action>
<action dev="POI-DEVELOPERS" type="fix">45784 - Support long chart titles in SeriesTextRecords</action>
<action dev="POI-DEVELOPERS" type="fix">45777 - Throw an exception if HSSF Footer or Header is attemped to be set too long, rather than having it break during writing out</action>

View File

@ -44,6 +44,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
private boolean includeSheetNames = true;
private boolean formulasNotResults = false;
private boolean includeCellComments = false;
private boolean includeBlankCells = false;
public ExcelExtractor(HSSFWorkbook wb) {
super(wb);
@ -73,6 +74,14 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
public void setIncludeCellComments(boolean includeCellComments) {
this.includeCellComments = includeCellComments;
}
/**
* Should blank cells be output? Default is to only
* output cells that are present in the file and are
* non-blank.
*/
public void setIncludeBlankCells(boolean includeBlankCells) {
this.includeBlankCells = includeBlankCells;
}
/**
* Retreives the text contents of the file
@ -80,6 +89,11 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
public String getText() {
StringBuffer text = new StringBuffer();
// We don't care about the differnce between
// null (missing) and blank cells
wb.setMissingCellPolicy(HSSFRow.RETURN_BLANK_AS_NULL);
// Process each sheet in turn
for(int i=0;i<wb.getNumberOfSheets();i++) {
HSSFSheet sheet = wb.getSheetAt(i);
if(sheet == null) { continue; }
@ -108,15 +122,19 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
// Check each cell in turn
int firstCell = row.getFirstCellNum();
int lastCell = row.getLastCellNum();
if(includeBlankCells) {
firstCell = 0;
}
for(int k=firstCell;k<lastCell;k++) {
HSSFCell cell = row.getCell(k);
if(cell == null) { continue; }
boolean outputContents = true;
if(cell == null) {
// Only output if requested
outputContents = includeBlankCells;
} else {
switch(cell.getCellType()) {
case HSSFCell.CELL_TYPE_BLANK:
outputContents = false;
break;
case HSSFCell.CELL_TYPE_STRING:
text.append(cell.getRichStringCellValue().getString());
break;
@ -166,6 +184,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
String commentText = comment.getString().getString().replace('\n', ' ');
text.append(" Comment by "+comment.getAuthor()+": "+commentText);
}
}
// Output a tab if we're not on the last cell
if(outputContents && k < (lastCell-1)) {

View File

@ -187,6 +187,27 @@ public final class TestExcelExtractor extends TestCase {
);
}
public void testWithBlank() throws Exception {
ExcelExtractor extractor = createExtractor("MissingBits.xls");
String def = extractor.getText();
extractor.setIncludeBlankCells(true);
String padded = extractor.getText();
assertTrue(def.startsWith(
"Sheet1\n" +
"&[TAB]\t\n" +
"Hello\t\n" +
"11.0\t23.0\t\n"
));
assertTrue(padded.startsWith(
"Sheet1\n" +
"&[TAB]\t\n" +
"Hello\t\t\t\t\t\t\t\t\t\t\t\n" +
"11.0\t\t\t23.0\t\t\t\t\t\t\t\t\n"
));
}
/**
* Embded in a non-excel file