Start updating the excel extractor to the new style code
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@635026 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9a8ee629c4
commit
b950c116dc
@ -21,7 +21,7 @@
|
||||
|
||||
<document>
|
||||
<header>
|
||||
<title>Busy Developers' Guide to HSSF Features</title>
|
||||
<title>Busy Developers' Guide to HSSF and XSSF Features</title>
|
||||
<authors>
|
||||
<person email="user@poi.apache.org" name="Glen Stampoultzis" id="CO"/>
|
||||
<person email="user@poi.apache.org" name="Yegor Kozlov" id="YK"/>
|
||||
@ -30,8 +30,9 @@
|
||||
<body>
|
||||
<section><title>Busy Developers' Guide to Features</title>
|
||||
<p>
|
||||
Want to use HSSF read and write spreadsheets in a hurry? This guide is for you. If you're after
|
||||
more in-depth coverage of the HSSF user-API please consult the <link href="how-to.html">HOWTO</link>
|
||||
Want to use HSSF and XSSF read and write spreadsheets in a hurry? This
|
||||
guide is for you. If you're after more in-depth coverage of the HSSF and
|
||||
XSSF user-APIs, please consult the <link href="how-to.html">HOWTO</link>
|
||||
guide as it contains actual descriptions of how to use this stuff.
|
||||
</p>
|
||||
<section><title>Index of Features</title>
|
||||
|
@ -46,18 +46,33 @@ public abstract class POIXMLDocument {
|
||||
|
||||
protected POIXMLDocument() {}
|
||||
|
||||
protected POIXMLDocument(String path) throws IOException {
|
||||
try {
|
||||
this.pkg = Package.open(path);
|
||||
PackageRelationship coreDocRelationship = this.pkg.getRelationshipsByType(
|
||||
PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0);
|
||||
|
||||
// Get core part
|
||||
this.corePart = this.pkg.getPart(coreDocRelationship);
|
||||
} catch (InvalidFormatException e) {
|
||||
throw new IOException(e.toString());
|
||||
protected POIXMLDocument(Package pkg) throws IOException {
|
||||
try {
|
||||
this.pkg = pkg;
|
||||
|
||||
PackageRelationship coreDocRelationship = this.pkg.getRelationshipsByType(
|
||||
PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0);
|
||||
|
||||
// Get core part
|
||||
this.corePart = this.pkg.getPart(coreDocRelationship);
|
||||
} catch (OpenXML4JException e) {
|
||||
throw new IOException(e.toString());
|
||||
}
|
||||
}
|
||||
protected POIXMLDocument(String path) throws IOException {
|
||||
this(openPackage(path));
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper to open a package, returning an IOException
|
||||
* in the event of a problem.
|
||||
* Works around shortcomings in java's this() constructor calls
|
||||
*/
|
||||
protected static Package openPackage(String path) throws IOException {
|
||||
try {
|
||||
return Package.open(path);
|
||||
} catch (InvalidFormatException e) {
|
||||
throw new IOException(e.toString());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -20,6 +20,11 @@ import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.poi.POIXMLTextExtractor;
|
||||
import org.apache.poi.ss.usermodel.Cell;
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.openxml4j.exceptions.OpenXML4JException;
|
||||
@ -33,10 +38,13 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
|
||||
* Helper class to extract text from an OOXML Excel file
|
||||
*/
|
||||
public class XSSFExcelExtractor extends POIXMLTextExtractor {
|
||||
private XSSFWorkbook workbook;
|
||||
private Workbook workbook;
|
||||
private boolean includeSheetNames = true;
|
||||
private boolean formulasNotResults = false;
|
||||
|
||||
public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
|
||||
this(new XSSFWorkbook(path));
|
||||
}
|
||||
public XSSFExcelExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
|
||||
this(new XSSFWorkbook(container));
|
||||
}
|
||||
@ -52,9 +60,7 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
|
||||
System.exit(1);
|
||||
}
|
||||
POIXMLTextExtractor extractor =
|
||||
new HXFExcelExtractor(HXFDocument.openPackage(
|
||||
new File(args[0])
|
||||
));
|
||||
new XSSFExcelExtractor(args[0]);
|
||||
System.out.println(extractor.getText());
|
||||
}
|
||||
|
||||
@ -78,48 +84,27 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
|
||||
public String getText() {
|
||||
StringBuffer text = new StringBuffer();
|
||||
|
||||
CTSheet[] sheetRefs =
|
||||
workbook._getHSSFXML().getSheetReferences().getSheetArray();
|
||||
for(int i=0; i<sheetRefs.length; i++) {
|
||||
try {
|
||||
CTWorksheet sheet =
|
||||
workbook._getHSSFXML().getSheet(sheetRefs[i]);
|
||||
CTRow[] rows =
|
||||
sheet.getSheetData().getRowArray();
|
||||
|
||||
if(i > 0) {
|
||||
text.append("\n");
|
||||
}
|
||||
if(includeSheetNames) {
|
||||
text.append(sheetRefs[i].getName() + "\n");
|
||||
}
|
||||
|
||||
for(int j=0; j<rows.length; j++) {
|
||||
CTCell[] cells = rows[j].getCArray();
|
||||
for(int k=0; k<cells.length; k++) {
|
||||
CTCell cell = cells[k];
|
||||
if(k > 0) {
|
||||
text.append("\t");
|
||||
}
|
||||
|
||||
boolean done = false;
|
||||
|
||||
// Is it a formula one?
|
||||
if(cell.getF() != null) {
|
||||
if(formulasNotResults) {
|
||||
text.append(cell.getF().getStringValue());
|
||||
done = true;
|
||||
}
|
||||
}
|
||||
if(!done) {
|
||||
HSSFXMLCell uCell = new HSSFXMLCell(cell, workbook);
|
||||
text.append(uCell.getStringValue());
|
||||
}
|
||||
for(int i=0; i<workbook.getNumberOfSheets(); i++) {
|
||||
Sheet sheet = workbook.getSheetAt(i);
|
||||
if(includeSheetNames) {
|
||||
text.append(workbook.getSheetName(i) + "\n");
|
||||
}
|
||||
|
||||
for (Object rawR : sheet) {
|
||||
Row row = (Row)rawR;
|
||||
for (Object rawC: row) {
|
||||
Cell cell = (Cell)rawC;
|
||||
|
||||
// Is it a formula one?
|
||||
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) {
|
||||
text.append(cell.getCellFormula());
|
||||
} else {
|
||||
text.append(cell.toString());
|
||||
}
|
||||
text.append("\n");
|
||||
|
||||
text.append(",");
|
||||
}
|
||||
} catch(Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
text.append("\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -89,7 +89,10 @@ public class XSSFWorkbook extends POIXMLDocument implements Workbook {
|
||||
}
|
||||
|
||||
public XSSFWorkbook(String path) throws IOException {
|
||||
super(path);
|
||||
this(openPackage(path));
|
||||
}
|
||||
public XSSFWorkbook(Package pkg) throws IOException {
|
||||
super(pkg);
|
||||
try {
|
||||
WorkbookDocument doc = WorkbookDocument.Factory.parse(getCorePart().getInputStream());
|
||||
this.workbook = doc.getWorkbook();
|
||||
|
@ -14,7 +14,7 @@
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hssf.extractor;
|
||||
package org.apache.poi.xssf.extractor;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
@ -24,28 +24,28 @@ import java.util.regex.Pattern;
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.poi.POITextExtractor;
|
||||
import org.apache.poi.hssf.HSSFXML;
|
||||
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
||||
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
||||
import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook;
|
||||
import org.apache.poi.hxf.HXFDocument;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
|
||||
/**
|
||||
* Tests for HXFExcelExtractor
|
||||
* Tests for XSSFExcelExtractor
|
||||
*/
|
||||
public class TestHXFExcelExtractor extends TestCase {
|
||||
public class TestXSSFExcelExtractor extends TestCase {
|
||||
/**
|
||||
* A very simple file
|
||||
*/
|
||||
private HSSFXML xmlA;
|
||||
private XSSFWorkbook xmlA;
|
||||
private File fileA;
|
||||
/**
|
||||
* A fairly complex file
|
||||
*/
|
||||
private HSSFXML xmlB;
|
||||
private XSSFWorkbook xmlB;
|
||||
|
||||
/**
|
||||
* A fairly simple file - ooxml
|
||||
*/
|
||||
private HSSFXML simpleXLSX;
|
||||
private XSSFWorkbook simpleXLSX;
|
||||
/**
|
||||
* A fairly simple file - ole2
|
||||
*/
|
||||
@ -54,7 +54,7 @@ public class TestHXFExcelExtractor extends TestCase {
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
File fileA = new File(
|
||||
fileA = new File(
|
||||
System.getProperty("HSSF.testdata.path") +
|
||||
File.separator + "sample.xlsx"
|
||||
);
|
||||
@ -72,10 +72,10 @@ public class TestHXFExcelExtractor extends TestCase {
|
||||
File.separator + "SampleSS.xls"
|
||||
);
|
||||
|
||||
xmlA = new HSSFXML(HXFDocument.openPackage(fileA));
|
||||
xmlB = new HSSFXML(HXFDocument.openPackage(fileB));
|
||||
xmlA = new XSSFWorkbook(fileA.toString());
|
||||
xmlB = new XSSFWorkbook(fileB.toString());
|
||||
|
||||
simpleXLSX = new HSSFXML(HXFDocument.openPackage(fileSOOXML));
|
||||
simpleXLSX = new XSSFWorkbook(fileSOOXML.toString());
|
||||
simpleXLS = new HSSFWorkbook(new FileInputStream(fileSOLE2));
|
||||
}
|
||||
|
||||
@ -83,11 +83,11 @@ public class TestHXFExcelExtractor extends TestCase {
|
||||
* Get text out of the simple file
|
||||
*/
|
||||
public void testGetSimpleText() throws Exception {
|
||||
new HXFExcelExtractor(xmlA.getPackage());
|
||||
new HXFExcelExtractor(new HSSFXMLWorkbook(xmlA));
|
||||
new XSSFExcelExtractor(fileA.toString());
|
||||
new XSSFExcelExtractor(xmlA);
|
||||
|
||||
HXFExcelExtractor extractor =
|
||||
new HXFExcelExtractor(xmlA.getPackage());
|
||||
XSSFExcelExtractor extractor =
|
||||
new XSSFExcelExtractor(xmlA);
|
||||
extractor.getText();
|
||||
|
||||
String text = extractor.getText();
|
||||
@ -150,11 +150,10 @@ public class TestHXFExcelExtractor extends TestCase {
|
||||
}
|
||||
|
||||
public void testGetComplexText() throws Exception {
|
||||
new HXFExcelExtractor(xmlB.getPackage());
|
||||
new HXFExcelExtractor(new HSSFXMLWorkbook(xmlB));
|
||||
new XSSFExcelExtractor(xmlB);
|
||||
|
||||
HXFExcelExtractor extractor =
|
||||
new HXFExcelExtractor(xmlB.getPackage());
|
||||
XSSFExcelExtractor extractor =
|
||||
new XSSFExcelExtractor(xmlB);
|
||||
extractor.getText();
|
||||
|
||||
String text = extractor.getText();
|
||||
@ -174,8 +173,8 @@ public class TestHXFExcelExtractor extends TestCase {
|
||||
* the same file, just saved as xls and xlsx
|
||||
*/
|
||||
public void testComparedToOLE2() throws Exception {
|
||||
HXFExcelExtractor ooxmlExtractor =
|
||||
new HXFExcelExtractor(simpleXLSX.getPackage());
|
||||
XSSFExcelExtractor ooxmlExtractor =
|
||||
new XSSFExcelExtractor(simpleXLSX);
|
||||
ExcelExtractor ole2Extractor =
|
||||
new ExcelExtractor(simpleXLS);
|
||||
|
Loading…
Reference in New Issue
Block a user