More on converting the excel extractor to the new code
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@635030 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b950c116dc
commit
36df1396ed
@ -18,12 +18,14 @@ package org.apache.poi.xssf.extractor;
|
|||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
import org.apache.poi.POIXMLTextExtractor;
|
import org.apache.poi.POIXMLTextExtractor;
|
||||||
import org.apache.poi.ss.usermodel.Cell;
|
import org.apache.poi.ss.usermodel.Cell;
|
||||||
import org.apache.poi.ss.usermodel.Row;
|
import org.apache.poi.ss.usermodel.Row;
|
||||||
import org.apache.poi.ss.usermodel.Sheet;
|
import org.apache.poi.ss.usermodel.Sheet;
|
||||||
import org.apache.poi.ss.usermodel.Workbook;
|
import org.apache.poi.ss.usermodel.Workbook;
|
||||||
|
import org.apache.poi.xssf.usermodel.XSSFCell;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||||
import org.apache.xmlbeans.XmlException;
|
import org.apache.xmlbeans.XmlException;
|
||||||
@ -92,17 +94,21 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
|
|||||||
|
|
||||||
for (Object rawR : sheet) {
|
for (Object rawR : sheet) {
|
||||||
Row row = (Row)rawR;
|
Row row = (Row)rawR;
|
||||||
for (Object rawC: row) {
|
for(Iterator ri = row.cellIterator(); ri.hasNext();) {
|
||||||
Cell cell = (Cell)rawC;
|
Cell cell = (Cell)ri.next();
|
||||||
|
|
||||||
// Is it a formula one?
|
// Is it a formula one?
|
||||||
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) {
|
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) {
|
||||||
text.append(cell.getCellFormula());
|
text.append(cell.getCellFormula());
|
||||||
|
} else if(cell.getCellType() == Cell.CELL_TYPE_STRING) {
|
||||||
|
text.append(cell.getRichStringCellValue().getString());
|
||||||
} else {
|
} else {
|
||||||
text.append(cell.toString());
|
XSSFCell xc = (XSSFCell)cell;
|
||||||
|
text.append(xc.getRawValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
text.append(",");
|
if(ri.hasNext())
|
||||||
|
text.append("\t");
|
||||||
}
|
}
|
||||||
text.append("\n");
|
text.append("\n");
|
||||||
}
|
}
|
||||||
|
@ -78,7 +78,7 @@ public class XSSFCell implements Cell {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public String getCellFormula() {
|
public String getCellFormula() {
|
||||||
if (STCellType.STR != cell.getT()) {
|
if(this.cell.getF() == null) {
|
||||||
throw new NumberFormatException("You cannot get a formula from a non-formula cell");
|
throw new NumberFormatException("You cannot get a formula from a non-formula cell");
|
||||||
}
|
}
|
||||||
return this.cell.getF().getStringValue();
|
return this.cell.getF().getStringValue();
|
||||||
@ -94,6 +94,12 @@ public class XSSFCell implements Cell {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public int getCellType() {
|
public int getCellType() {
|
||||||
|
// Detecting formulas is quite pesky,
|
||||||
|
// as they don't get their type set
|
||||||
|
if(this.cell.getF() != null) {
|
||||||
|
return CELL_TYPE_FORMULA;
|
||||||
|
}
|
||||||
|
|
||||||
switch (this.cell.getT().intValue()) {
|
switch (this.cell.getT().intValue()) {
|
||||||
case STCellType.INT_B:
|
case STCellType.INT_B:
|
||||||
return CELL_TYPE_BOOLEAN;
|
return CELL_TYPE_BOOLEAN;
|
||||||
@ -290,6 +296,13 @@ public class XSSFCell implements Cell {
|
|||||||
return "[" + this.row.getRowNum() + "," + this.getCellNum() + "] " + this.cell.getV();
|
return "[" + this.row.getRowNum() + "," + this.getCellNum() + "] " + this.cell.getV();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the raw, underlying ooxml value for the cell
|
||||||
|
*/
|
||||||
|
public String getRawValue() {
|
||||||
|
return this.cell.getV();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @throws RuntimeException if the bounds are exceeded.
|
* @throws RuntimeException if the bounds are exceeded.
|
||||||
*/
|
*/
|
||||||
|
@ -79,6 +79,9 @@ public class XSSFRichTextString implements RichTextString {
|
|||||||
public String getString() {
|
public String getString() {
|
||||||
return string;
|
return string;
|
||||||
}
|
}
|
||||||
|
public String toString() {
|
||||||
|
return string;
|
||||||
|
}
|
||||||
|
|
||||||
public int length() {
|
public int length() {
|
||||||
return string.length();
|
return string.length();
|
||||||
|
@ -35,59 +35,56 @@ public class TestXSSFExcelExtractor extends TestCase {
|
|||||||
/**
|
/**
|
||||||
* A very simple file
|
* A very simple file
|
||||||
*/
|
*/
|
||||||
private XSSFWorkbook xmlA;
|
private File xmlA;
|
||||||
private File fileA;
|
|
||||||
/**
|
/**
|
||||||
* A fairly complex file
|
* A fairly complex file
|
||||||
*/
|
*/
|
||||||
private XSSFWorkbook xmlB;
|
private File xmlB;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A fairly simple file - ooxml
|
* A fairly simple file - ooxml
|
||||||
*/
|
*/
|
||||||
private XSSFWorkbook simpleXLSX;
|
private File simpleXLSX;
|
||||||
/**
|
/**
|
||||||
* A fairly simple file - ole2
|
* A fairly simple file - ole2
|
||||||
*/
|
*/
|
||||||
private HSSFWorkbook simpleXLS;
|
private File simpleXLS;
|
||||||
|
|
||||||
protected void setUp() throws Exception {
|
protected void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
|
|
||||||
fileA = new File(
|
xmlA = new File(
|
||||||
System.getProperty("HSSF.testdata.path") +
|
System.getProperty("HSSF.testdata.path") +
|
||||||
File.separator + "sample.xlsx"
|
File.separator + "sample.xlsx"
|
||||||
);
|
);
|
||||||
File fileB = new File(
|
assertTrue(xmlA.exists());
|
||||||
|
xmlB = new File(
|
||||||
System.getProperty("HSSF.testdata.path") +
|
System.getProperty("HSSF.testdata.path") +
|
||||||
File.separator + "AverageTaxRates.xlsx"
|
File.separator + "AverageTaxRates.xlsx"
|
||||||
);
|
);
|
||||||
|
assertTrue(xmlB.exists());
|
||||||
|
|
||||||
File fileSOOXML = new File(
|
simpleXLSX = new File(
|
||||||
System.getProperty("HSSF.testdata.path") +
|
System.getProperty("HSSF.testdata.path") +
|
||||||
File.separator + "SampleSS.xlsx"
|
File.separator + "SampleSS.xlsx"
|
||||||
);
|
);
|
||||||
File fileSOLE2 = new File(
|
simpleXLS = new File(
|
||||||
System.getProperty("HSSF.testdata.path") +
|
System.getProperty("HSSF.testdata.path") +
|
||||||
File.separator + "SampleSS.xls"
|
File.separator + "SampleSS.xls"
|
||||||
);
|
);
|
||||||
|
assertTrue(simpleXLS.exists());
|
||||||
xmlA = new XSSFWorkbook(fileA.toString());
|
assertTrue(simpleXLSX.exists());
|
||||||
xmlB = new XSSFWorkbook(fileB.toString());
|
|
||||||
|
|
||||||
simpleXLSX = new XSSFWorkbook(fileSOOXML.toString());
|
|
||||||
simpleXLS = new HSSFWorkbook(new FileInputStream(fileSOLE2));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get text out of the simple file
|
* Get text out of the simple file
|
||||||
*/
|
*/
|
||||||
public void testGetSimpleText() throws Exception {
|
public void testGetSimpleText() throws Exception {
|
||||||
new XSSFExcelExtractor(fileA.toString());
|
new XSSFExcelExtractor(xmlA.toString());
|
||||||
new XSSFExcelExtractor(xmlA);
|
new XSSFExcelExtractor(new XSSFWorkbook(xmlA.toString()));
|
||||||
|
|
||||||
XSSFExcelExtractor extractor =
|
XSSFExcelExtractor extractor =
|
||||||
new XSSFExcelExtractor(xmlA);
|
new XSSFExcelExtractor(xmlA.toString());
|
||||||
extractor.getText();
|
extractor.getText();
|
||||||
|
|
||||||
String text = extractor.getText();
|
String text = extractor.getText();
|
||||||
@ -110,8 +107,7 @@ public class TestXSSFExcelExtractor extends TestCase {
|
|||||||
"adipiscing\t777\n" +
|
"adipiscing\t777\n" +
|
||||||
"elit\t888\n" +
|
"elit\t888\n" +
|
||||||
"Nunc\t999\n" +
|
"Nunc\t999\n" +
|
||||||
"at\t4995\n" +
|
"at\t4995\n", text);
|
||||||
"\n\n", text);
|
|
||||||
|
|
||||||
// Now get formulas not their values
|
// Now get formulas not their values
|
||||||
extractor.setFormulasNotResults(true);
|
extractor.setFormulasNotResults(true);
|
||||||
@ -126,8 +122,7 @@ public class TestXSSFExcelExtractor extends TestCase {
|
|||||||
"adipiscing\t777\n" +
|
"adipiscing\t777\n" +
|
||||||
"elit\t888\n" +
|
"elit\t888\n" +
|
||||||
"Nunc\t999\n" +
|
"Nunc\t999\n" +
|
||||||
"at\tSUM(B1:B9)\n" +
|
"at\tSUM(B1:B9)\n", text);
|
||||||
"\n\n", text);
|
|
||||||
|
|
||||||
// With sheet names too
|
// With sheet names too
|
||||||
extractor.setIncludeSheetNames(true);
|
extractor.setIncludeSheetNames(true);
|
||||||
@ -143,17 +138,17 @@ public class TestXSSFExcelExtractor extends TestCase {
|
|||||||
"adipiscing\t777\n" +
|
"adipiscing\t777\n" +
|
||||||
"elit\t888\n" +
|
"elit\t888\n" +
|
||||||
"Nunc\t999\n" +
|
"Nunc\t999\n" +
|
||||||
"at\tSUM(B1:B9)\n\n" +
|
"at\tSUM(B1:B9)\n" +
|
||||||
"Sheet2\n\n" +
|
"Sheet2\n" +
|
||||||
"Sheet3\n"
|
"Sheet3\n"
|
||||||
, text);
|
, text);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testGetComplexText() throws Exception {
|
public void testGetComplexText() throws Exception {
|
||||||
new XSSFExcelExtractor(xmlB);
|
new XSSFExcelExtractor(xmlB.toString());
|
||||||
|
|
||||||
XSSFExcelExtractor extractor =
|
XSSFExcelExtractor extractor =
|
||||||
new XSSFExcelExtractor(xmlB);
|
new XSSFExcelExtractor(new XSSFWorkbook(xmlB.toString()));
|
||||||
extractor.getText();
|
extractor.getText();
|
||||||
|
|
||||||
String text = extractor.getText();
|
String text = extractor.getText();
|
||||||
@ -174,9 +169,10 @@ public class TestXSSFExcelExtractor extends TestCase {
|
|||||||
*/
|
*/
|
||||||
public void testComparedToOLE2() throws Exception {
|
public void testComparedToOLE2() throws Exception {
|
||||||
XSSFExcelExtractor ooxmlExtractor =
|
XSSFExcelExtractor ooxmlExtractor =
|
||||||
new XSSFExcelExtractor(simpleXLSX);
|
new XSSFExcelExtractor(simpleXLSX.toString());
|
||||||
ExcelExtractor ole2Extractor =
|
ExcelExtractor ole2Extractor =
|
||||||
new ExcelExtractor(simpleXLS);
|
new ExcelExtractor(new HSSFWorkbook(
|
||||||
|
new FileInputStream(simpleXLS)));
|
||||||
|
|
||||||
POITextExtractor[] extractors =
|
POITextExtractor[] extractors =
|
||||||
new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
|
new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
|
||||||
|
Loading…
Reference in New Issue
Block a user