Another fix inspired by bug #48494 - have ExcelExtractor make use of HSSFDataFormatter, so that numbers and dates come out closer to how Excel would render them
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@950657 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
393e672176
commit
53f8d4561f
@ -34,6 +34,7 @@
|
|||||||
|
|
||||||
<changes>
|
<changes>
|
||||||
<release version="3.7-SNAPSHOT" date="2010-??-??">
|
<release version="3.7-SNAPSHOT" date="2010-??-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="fix">48494 - have ExcelExtractor make use of HSSFDataFormatter, so that numbers and dates come out closer to how Excel would render them</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">48494 - have EventBasedExcelExtractor make use of HSSFDataFormatter, so that numbers and dates come out closer to how Excel would render them</action>
|
<action dev="POI-DEVELOPERS" type="fix">48494 - have EventBasedExcelExtractor make use of HSSFDataFormatter, so that numbers and dates come out closer to how Excel would render them</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">49096 - add clone support to Chart begin and end records, to allow cloning of more Chart containing sheets</action>
|
<action dev="POI-DEVELOPERS" type="fix">49096 - add clone support to Chart begin and end records, to allow cloning of more Chart containing sheets</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">List attachment names in the output of OutlookTextExtractor (to get attachment contents, use ExtractorFactory as normal)</action>
|
<action dev="POI-DEVELOPERS" type="add">List attachment names in the output of OutlookTextExtractor (to get attachment contents, use ExtractorFactory as normal)</action>
|
||||||
|
@ -26,7 +26,9 @@ import java.io.PrintStream;
|
|||||||
import org.apache.poi.POIOLE2TextExtractor;
|
import org.apache.poi.POIOLE2TextExtractor;
|
||||||
import org.apache.poi.hssf.record.formula.eval.ErrorEval;
|
import org.apache.poi.hssf.record.formula.eval.ErrorEval;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFCell;
|
import org.apache.poi.hssf.usermodel.HSSFCell;
|
||||||
|
import org.apache.poi.hssf.usermodel.HSSFCellStyle;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFComment;
|
import org.apache.poi.hssf.usermodel.HSSFComment;
|
||||||
|
import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
|
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFRow;
|
import org.apache.poi.hssf.usermodel.HSSFRow;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFSheet;
|
import org.apache.poi.hssf.usermodel.HSSFSheet;
|
||||||
@ -51,6 +53,7 @@ import org.apache.poi.ss.usermodel.HeaderFooter;
|
|||||||
*/
|
*/
|
||||||
public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor {
|
public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor {
|
||||||
private HSSFWorkbook _wb;
|
private HSSFWorkbook _wb;
|
||||||
|
private HSSFDataFormatter _formatter;
|
||||||
private boolean _includeSheetNames = true;
|
private boolean _includeSheetNames = true;
|
||||||
private boolean _shouldEvaluateFormulas = true;
|
private boolean _shouldEvaluateFormulas = true;
|
||||||
private boolean _includeCellComments = false;
|
private boolean _includeCellComments = false;
|
||||||
@ -60,6 +63,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p
|
|||||||
public ExcelExtractor(HSSFWorkbook wb) {
|
public ExcelExtractor(HSSFWorkbook wb) {
|
||||||
super(wb);
|
super(wb);
|
||||||
_wb = wb;
|
_wb = wb;
|
||||||
|
_formatter = new HSSFDataFormatter();
|
||||||
}
|
}
|
||||||
public ExcelExtractor(POIFSFileSystem fs) throws IOException {
|
public ExcelExtractor(POIFSFileSystem fs) throws IOException {
|
||||||
this(fs.getRoot(), fs);
|
this(fs.getRoot(), fs);
|
||||||
@ -323,8 +327,9 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p
|
|||||||
text.append(cell.getRichStringCellValue().getString());
|
text.append(cell.getRichStringCellValue().getString());
|
||||||
break;
|
break;
|
||||||
case HSSFCell.CELL_TYPE_NUMERIC:
|
case HSSFCell.CELL_TYPE_NUMERIC:
|
||||||
// Note - we don't apply any formatting!
|
text.append(
|
||||||
text.append(cell.getNumericCellValue());
|
_formatter.formatCellValue(cell)
|
||||||
|
);
|
||||||
break;
|
break;
|
||||||
case HSSFCell.CELL_TYPE_BOOLEAN:
|
case HSSFCell.CELL_TYPE_BOOLEAN:
|
||||||
text.append(cell.getBooleanCellValue());
|
text.append(cell.getBooleanCellValue());
|
||||||
@ -344,7 +349,18 @@ public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.p
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case HSSFCell.CELL_TYPE_NUMERIC:
|
case HSSFCell.CELL_TYPE_NUMERIC:
|
||||||
text.append(cell.getNumericCellValue());
|
HSSFCellStyle style = cell.getCellStyle();
|
||||||
|
if(style == null) {
|
||||||
|
text.append( cell.getNumericCellValue() );
|
||||||
|
} else {
|
||||||
|
text.append(
|
||||||
|
_formatter.formatRawCellContents(
|
||||||
|
cell.getNumericCellValue(),
|
||||||
|
style.getDataFormat(),
|
||||||
|
style.getDataFormatString()
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case HSSFCell.CELL_TYPE_BOOLEAN:
|
case HSSFCell.CELL_TYPE_BOOLEAN:
|
||||||
text.append(cell.getBooleanCellValue());
|
text.append(cell.getBooleanCellValue());
|
||||||
|
@ -63,11 +63,11 @@ public final class TestExcelExtractor extends TestCase {
|
|||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"Sheet1\n" +
|
"Sheet1\n" +
|
||||||
"1000.0\t1.0\t5.0\n" +
|
"1000\t1\t5\n" +
|
||||||
"2000.0\t2.0\n" +
|
"2000\t2\n" +
|
||||||
"3000.0\t3.0\n" +
|
"3000\t3\n" +
|
||||||
"4000.0\t4.0\n" +
|
"4000\t4\n" +
|
||||||
"5000.0\t5.0\n" +
|
"5000\t5\n" +
|
||||||
"Sheet2\nSheet3\n",
|
"Sheet2\nSheet3\n",
|
||||||
extractor.getText()
|
extractor.getText()
|
||||||
);
|
);
|
||||||
@ -76,11 +76,11 @@ public final class TestExcelExtractor extends TestCase {
|
|||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"Sheet1\n" +
|
"Sheet1\n" +
|
||||||
"1000.0\t1.0\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
|
"1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
|
||||||
"2000.0\t2.0\n" +
|
"2000\t2\n" +
|
||||||
"3000.0\t3.0\n" +
|
"3000\t3\n" +
|
||||||
"4000.0\t4.0\n" +
|
"4000\t4\n" +
|
||||||
"5000.0\t5.0\n" +
|
"5000\t5\n" +
|
||||||
"Sheet2\nSheet3\n",
|
"Sheet2\nSheet3\n",
|
||||||
extractor.getText()
|
extractor.getText()
|
||||||
);
|
);
|
||||||
@ -173,18 +173,18 @@ public final class TestExcelExtractor extends TestCase {
|
|||||||
|
|
||||||
// Check without comments
|
// Check without comments
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"1.0\tone\n" +
|
"1\tone\n" +
|
||||||
"2.0\ttwo\n" +
|
"2\ttwo\n" +
|
||||||
"3.0\tthree\n",
|
"3\tthree\n",
|
||||||
extractor.getText()
|
extractor.getText()
|
||||||
);
|
);
|
||||||
|
|
||||||
// Now with
|
// Now with
|
||||||
extractor.setIncludeCellComments(true);
|
extractor.setIncludeCellComments(true);
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"1.0\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" +
|
"1\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" +
|
||||||
"2.0\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" +
|
"2\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" +
|
||||||
"3.0\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n",
|
"3\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n",
|
||||||
extractor.getText()
|
extractor.getText()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -199,20 +199,65 @@ public final class TestExcelExtractor extends TestCase {
|
|||||||
"Sheet1\n" +
|
"Sheet1\n" +
|
||||||
"&[TAB]\t\n" +
|
"&[TAB]\t\n" +
|
||||||
"Hello\n" +
|
"Hello\n" +
|
||||||
"11.0\t23.0\n"
|
"11\t23\n"
|
||||||
));
|
));
|
||||||
|
|
||||||
assertTrue(padded.startsWith(
|
assertTrue(padded.startsWith(
|
||||||
"Sheet1\n" +
|
"Sheet1\n" +
|
||||||
"&[TAB]\t\n" +
|
"&[TAB]\t\n" +
|
||||||
"Hello\n" +
|
"Hello\n" +
|
||||||
"11.0\t\t\t23.0\n"
|
"11\t\t\t23\n"
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testFormatting() throws Exception {
|
||||||
|
ExcelExtractor extractor = createExtractor("Formatting.xls");
|
||||||
|
extractor.setIncludeBlankCells(false);
|
||||||
|
extractor.setIncludeSheetNames(false);
|
||||||
|
String text = extractor.getText();
|
||||||
|
|
||||||
|
// Note - not all the formats in the file
|
||||||
|
// actually quite match what they claim to
|
||||||
|
// be, as some are auto-local builtins...
|
||||||
|
|
||||||
|
assertTrue(text.startsWith(
|
||||||
|
"Dates, all 24th November 2006\n"
|
||||||
|
));
|
||||||
|
assertTrue(
|
||||||
|
text.indexOf(
|
||||||
|
"yyyy/mm/dd\t2006/11/24\n"
|
||||||
|
) > -1
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
text.indexOf(
|
||||||
|
"yyyy-mm-dd\t2006-11-24\n"
|
||||||
|
) > -1
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
text.indexOf(
|
||||||
|
"dd-mm-yy\t24-11-06\n"
|
||||||
|
) > -1
|
||||||
|
);
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
text.indexOf(
|
||||||
|
"nn.nn\t10.52\n"
|
||||||
|
) > -1
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
text.indexOf(
|
||||||
|
"nn.nnn\t10.520\n"
|
||||||
|
) > -1
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
text.indexOf(
|
||||||
|
"£nn.nn\t£10.52\n"
|
||||||
|
) > -1
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Embded in a non-excel file
|
* Embeded in a non-excel file
|
||||||
*/
|
*/
|
||||||
public void testWithEmbeded() throws Exception {
|
public void testWithEmbeded() throws Exception {
|
||||||
POIFSFileSystem fs = new POIFSFileSystem(
|
POIFSFileSystem fs = new POIFSFileSystem(
|
||||||
|
@ -309,6 +309,63 @@ public final class TestHSSFDataFormatter extends TestCase {
|
|||||||
|
|
||||||
assertEquals("2345", f.formatCellValue(cellA1));
|
assertEquals("2345", f.formatCellValue(cellA1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests various formattings of dates and numbers
|
||||||
|
*/
|
||||||
|
public void testFromFile() {
|
||||||
|
HSSFWorkbook workbook = HSSFTestDataSamples.openSampleWorkbook("Formatting.xls");
|
||||||
|
HSSFSheet sheet = workbook.getSheetAt(0);
|
||||||
|
|
||||||
|
HSSFDataFormatter f = new HSSFDataFormatter();
|
||||||
|
|
||||||
|
// This one is one of the nasty auto-locale changing ones...
|
||||||
|
assertEquals("dd/mm/yyyy", sheet.getRow(1).getCell(0).getStringCellValue());
|
||||||
|
assertEquals("m/d/yy", sheet.getRow(1).getCell(1).getCellStyle().getDataFormatString());
|
||||||
|
assertEquals("11/24/06", f.formatCellValue(sheet.getRow(1).getCell(1)));
|
||||||
|
|
||||||
|
assertEquals("yyyy/mm/dd", sheet.getRow(2).getCell(0).getStringCellValue());
|
||||||
|
assertEquals("yyyy/mm/dd", sheet.getRow(2).getCell(1).getCellStyle().getDataFormatString());
|
||||||
|
assertEquals("2006/11/24", f.formatCellValue(sheet.getRow(2).getCell(1)));
|
||||||
|
|
||||||
|
assertEquals("yyyy-mm-dd", sheet.getRow(3).getCell(0).getStringCellValue());
|
||||||
|
assertEquals("yyyy\\-mm\\-dd", sheet.getRow(3).getCell(1).getCellStyle().getDataFormatString());
|
||||||
|
assertEquals("2006-11-24", f.formatCellValue(sheet.getRow(3).getCell(1)));
|
||||||
|
|
||||||
|
assertEquals("yy/mm/dd", sheet.getRow(4).getCell(0).getStringCellValue());
|
||||||
|
assertEquals("yy/mm/dd", sheet.getRow(4).getCell(1).getCellStyle().getDataFormatString());
|
||||||
|
assertEquals("06/11/24", f.formatCellValue(sheet.getRow(4).getCell(1)));
|
||||||
|
|
||||||
|
// Another builtin fun one
|
||||||
|
assertEquals("dd/mm/yy", sheet.getRow(5).getCell(0).getStringCellValue());
|
||||||
|
assertEquals("d/m/yy;@", sheet.getRow(5).getCell(1).getCellStyle().getDataFormatString());
|
||||||
|
assertEquals("24/11/06", f.formatCellValue(sheet.getRow(5).getCell(1)));
|
||||||
|
|
||||||
|
assertEquals("dd-mm-yy", sheet.getRow(6).getCell(0).getStringCellValue());
|
||||||
|
assertEquals("dd\\-mm\\-yy", sheet.getRow(6).getCell(1).getCellStyle().getDataFormatString());
|
||||||
|
assertEquals("24-11-06", f.formatCellValue(sheet.getRow(6).getCell(1)));
|
||||||
|
|
||||||
|
|
||||||
|
// Another builtin fun one
|
||||||
|
assertEquals("nn.nn", sheet.getRow(9).getCell(0).getStringCellValue());
|
||||||
|
assertEquals("General", sheet.getRow(9).getCell(1).getCellStyle().getDataFormatString());
|
||||||
|
assertEquals("10.52", f.formatCellValue(sheet.getRow(9).getCell(1)));
|
||||||
|
|
||||||
|
// text isn't quite the format rule...
|
||||||
|
assertEquals("nn.nnn", sheet.getRow(10).getCell(0).getStringCellValue());
|
||||||
|
assertEquals("0.000", sheet.getRow(10).getCell(1).getCellStyle().getDataFormatString());
|
||||||
|
assertEquals("10.520", f.formatCellValue(sheet.getRow(10).getCell(1)));
|
||||||
|
|
||||||
|
// text isn't quite the format rule...
|
||||||
|
assertEquals("nn.n", sheet.getRow(11).getCell(0).getStringCellValue());
|
||||||
|
assertEquals("0.0", sheet.getRow(11).getCell(1).getCellStyle().getDataFormatString());
|
||||||
|
assertEquals("10.5", f.formatCellValue(sheet.getRow(11).getCell(1)));
|
||||||
|
|
||||||
|
// text isn't quite the format rule...
|
||||||
|
assertEquals("£nn.nn", sheet.getRow(12).getCell(0).getStringCellValue());
|
||||||
|
assertEquals("\"£\"#,##0.00", sheet.getRow(12).getCell(1).getCellStyle().getDataFormatString());
|
||||||
|
assertEquals("£10.52", f.formatCellValue(sheet.getRow(12).getCell(1)));
|
||||||
|
}
|
||||||
|
|
||||||
private static void log(String msg) {
|
private static void log(String msg) {
|
||||||
if (false) { // successful tests should be silent
|
if (false) { // successful tests should be silent
|
||||||
|
Loading…
Reference in New Issue
Block a user