More on converting the excel extractor to the new code

git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@635030 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-03-08 18:06:41 +00:00
parent b950c116dc
commit 36df1396ed
4 changed files with 51 additions and 33 deletions

View File

@ -18,12 +18,14 @@ package org.apache.poi.xssf.extractor;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.Iterator;
import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.xmlbeans.XmlException; import org.apache.xmlbeans.XmlException;
@ -92,17 +94,21 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
for (Object rawR : sheet) { for (Object rawR : sheet) {
Row row = (Row)rawR; Row row = (Row)rawR;
for (Object rawC: row) { for(Iterator ri = row.cellIterator(); ri.hasNext();) {
Cell cell = (Cell)rawC; Cell cell = (Cell)ri.next();
// Is it a formula one? // Is it a formula one?
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) { if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) {
text.append(cell.getCellFormula()); text.append(cell.getCellFormula());
} else if(cell.getCellType() == Cell.CELL_TYPE_STRING) {
text.append(cell.getRichStringCellValue().getString());
} else { } else {
text.append(cell.toString()); XSSFCell xc = (XSSFCell)cell;
text.append(xc.getRawValue());
} }
text.append(","); if(ri.hasNext())
text.append("\t");
} }
text.append("\n"); text.append("\n");
} }

View File

@ -78,7 +78,7 @@ public class XSSFCell implements Cell {
} }
public String getCellFormula() { public String getCellFormula() {
if (STCellType.STR != cell.getT()) { if(this.cell.getF() == null) {
throw new NumberFormatException("You cannot get a formula from a non-formula cell"); throw new NumberFormatException("You cannot get a formula from a non-formula cell");
} }
return this.cell.getF().getStringValue(); return this.cell.getF().getStringValue();
@ -94,6 +94,12 @@ public class XSSFCell implements Cell {
} }
public int getCellType() { public int getCellType() {
// Detecting formulas is quite pesky,
// as they don't get their type set
if(this.cell.getF() != null) {
return CELL_TYPE_FORMULA;
}
switch (this.cell.getT().intValue()) { switch (this.cell.getT().intValue()) {
case STCellType.INT_B: case STCellType.INT_B:
return CELL_TYPE_BOOLEAN; return CELL_TYPE_BOOLEAN;
@ -290,6 +296,13 @@ public class XSSFCell implements Cell {
return "[" + this.row.getRowNum() + "," + this.getCellNum() + "] " + this.cell.getV(); return "[" + this.row.getRowNum() + "," + this.getCellNum() + "] " + this.cell.getV();
} }
/**
* Returns the raw, underlying ooxml value for the cell
*/
public String getRawValue() {
return this.cell.getV();
}
/** /**
* @throws RuntimeException if the bounds are exceeded. * @throws RuntimeException if the bounds are exceeded.
*/ */

View File

@ -79,6 +79,9 @@ public class XSSFRichTextString implements RichTextString {
public String getString() { public String getString() {
return string; return string;
} }
public String toString() {
return string;
}
public int length() { public int length() {
return string.length(); return string.length();

View File

@ -35,59 +35,56 @@ public class TestXSSFExcelExtractor extends TestCase {
/** /**
* A very simple file * A very simple file
*/ */
private XSSFWorkbook xmlA; private File xmlA;
private File fileA;
/** /**
* A fairly complex file * A fairly complex file
*/ */
private XSSFWorkbook xmlB; private File xmlB;
/** /**
* A fairly simple file - ooxml * A fairly simple file - ooxml
*/ */
private XSSFWorkbook simpleXLSX; private File simpleXLSX;
/** /**
* A fairly simple file - ole2 * A fairly simple file - ole2
*/ */
private HSSFWorkbook simpleXLS; private File simpleXLS;
protected void setUp() throws Exception { protected void setUp() throws Exception {
super.setUp(); super.setUp();
fileA = new File( xmlA = new File(
System.getProperty("HSSF.testdata.path") + System.getProperty("HSSF.testdata.path") +
File.separator + "sample.xlsx" File.separator + "sample.xlsx"
); );
File fileB = new File( assertTrue(xmlA.exists());
xmlB = new File(
System.getProperty("HSSF.testdata.path") + System.getProperty("HSSF.testdata.path") +
File.separator + "AverageTaxRates.xlsx" File.separator + "AverageTaxRates.xlsx"
); );
assertTrue(xmlB.exists());
File fileSOOXML = new File( simpleXLSX = new File(
System.getProperty("HSSF.testdata.path") + System.getProperty("HSSF.testdata.path") +
File.separator + "SampleSS.xlsx" File.separator + "SampleSS.xlsx"
); );
File fileSOLE2 = new File( simpleXLS = new File(
System.getProperty("HSSF.testdata.path") + System.getProperty("HSSF.testdata.path") +
File.separator + "SampleSS.xls" File.separator + "SampleSS.xls"
); );
assertTrue(simpleXLS.exists());
xmlA = new XSSFWorkbook(fileA.toString()); assertTrue(simpleXLSX.exists());
xmlB = new XSSFWorkbook(fileB.toString());
simpleXLSX = new XSSFWorkbook(fileSOOXML.toString());
simpleXLS = new HSSFWorkbook(new FileInputStream(fileSOLE2));
} }
/** /**
* Get text out of the simple file * Get text out of the simple file
*/ */
public void testGetSimpleText() throws Exception { public void testGetSimpleText() throws Exception {
new XSSFExcelExtractor(fileA.toString()); new XSSFExcelExtractor(xmlA.toString());
new XSSFExcelExtractor(xmlA); new XSSFExcelExtractor(new XSSFWorkbook(xmlA.toString()));
XSSFExcelExtractor extractor = XSSFExcelExtractor extractor =
new XSSFExcelExtractor(xmlA); new XSSFExcelExtractor(xmlA.toString());
extractor.getText(); extractor.getText();
String text = extractor.getText(); String text = extractor.getText();
@ -110,8 +107,7 @@ public class TestXSSFExcelExtractor extends TestCase {
"adipiscing\t777\n" + "adipiscing\t777\n" +
"elit\t888\n" + "elit\t888\n" +
"Nunc\t999\n" + "Nunc\t999\n" +
"at\t4995\n" + "at\t4995\n", text);
"\n\n", text);
// Now get formulas not their values // Now get formulas not their values
extractor.setFormulasNotResults(true); extractor.setFormulasNotResults(true);
@ -126,8 +122,7 @@ public class TestXSSFExcelExtractor extends TestCase {
"adipiscing\t777\n" + "adipiscing\t777\n" +
"elit\t888\n" + "elit\t888\n" +
"Nunc\t999\n" + "Nunc\t999\n" +
"at\tSUM(B1:B9)\n" + "at\tSUM(B1:B9)\n", text);
"\n\n", text);
// With sheet names too // With sheet names too
extractor.setIncludeSheetNames(true); extractor.setIncludeSheetNames(true);
@ -143,17 +138,17 @@ public class TestXSSFExcelExtractor extends TestCase {
"adipiscing\t777\n" + "adipiscing\t777\n" +
"elit\t888\n" + "elit\t888\n" +
"Nunc\t999\n" + "Nunc\t999\n" +
"at\tSUM(B1:B9)\n\n" + "at\tSUM(B1:B9)\n" +
"Sheet2\n\n" + "Sheet2\n" +
"Sheet3\n" "Sheet3\n"
, text); , text);
} }
public void testGetComplexText() throws Exception { public void testGetComplexText() throws Exception {
new XSSFExcelExtractor(xmlB); new XSSFExcelExtractor(xmlB.toString());
XSSFExcelExtractor extractor = XSSFExcelExtractor extractor =
new XSSFExcelExtractor(xmlB); new XSSFExcelExtractor(new XSSFWorkbook(xmlB.toString()));
extractor.getText(); extractor.getText();
String text = extractor.getText(); String text = extractor.getText();
@ -174,9 +169,10 @@ public class TestXSSFExcelExtractor extends TestCase {
*/ */
public void testComparedToOLE2() throws Exception { public void testComparedToOLE2() throws Exception {
XSSFExcelExtractor ooxmlExtractor = XSSFExcelExtractor ooxmlExtractor =
new XSSFExcelExtractor(simpleXLSX); new XSSFExcelExtractor(simpleXLSX.toString());
ExcelExtractor ole2Extractor = ExcelExtractor ole2Extractor =
new ExcelExtractor(simpleXLS); new ExcelExtractor(new HSSFWorkbook(
new FileInputStream(simpleXLS)));
POITextExtractor[] extractors = POITextExtractor[] extractors =
new POITextExtractor[] { ooxmlExtractor, ole2Extractor }; new POITextExtractor[] { ooxmlExtractor, ole2Extractor };