* Verify some more Text-Extraction features as part of integration tests, fix some NullPointerExceptions that showed up now because the event-based extraction does not have a Document available
* Also handle a XLSX which does not have row-numbers in the sheet-xml. Excel can read it so it makes sense to also allow to read it in the XSSFSheetXMLHandler * Remove some Eclipse warnings in test-code git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1662691 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
80d0d3b5bd
commit
f043c44017
@ -16,15 +16,23 @@
|
|||||||
==================================================================== */
|
==================================================================== */
|
||||||
package org.apache.poi.stress;
|
package org.apache.poi.stress;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.poi.POITextExtractor;
|
import org.apache.poi.POITextExtractor;
|
||||||
import org.apache.poi.extractor.ExtractorFactory;
|
import org.apache.poi.extractor.ExtractorFactory;
|
||||||
|
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||||
|
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||||
|
import org.apache.xmlbeans.XmlException;
|
||||||
|
|
||||||
public abstract class AbstractFileHandler implements FileHandler {
|
public abstract class AbstractFileHandler implements FileHandler {
|
||||||
public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<String>();
|
public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<String>();
|
||||||
@ -48,6 +56,22 @@ public abstract class AbstractFileHandler implements FileHandler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void handleExtracting(File file) throws Exception {
|
public void handleExtracting(File file) throws Exception {
|
||||||
|
boolean before = ExtractorFactory.getThreadPrefersEventExtractors();
|
||||||
|
try {
|
||||||
|
ExtractorFactory.setThreadPrefersEventExtractors(true);
|
||||||
|
handleExtractingInternal(file);
|
||||||
|
|
||||||
|
ExtractorFactory.setThreadPrefersEventExtractors(false);
|
||||||
|
handleExtractingInternal(file);
|
||||||
|
} finally {
|
||||||
|
ExtractorFactory.setThreadPrefersEventExtractors(before);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void handleExtractingInternal(File file) throws Exception {
|
||||||
|
long length = file.length();
|
||||||
|
long modified = file.lastModified();
|
||||||
|
|
||||||
POITextExtractor extractor = ExtractorFactory.createExtractor(file);
|
POITextExtractor extractor = ExtractorFactory.createExtractor(file);
|
||||||
try {
|
try {
|
||||||
assertNotNull(extractor);
|
assertNotNull(extractor);
|
||||||
@ -60,6 +84,11 @@ public abstract class AbstractFileHandler implements FileHandler {
|
|||||||
|
|
||||||
assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!",
|
assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!",
|
||||||
EXPECTED_EXTRACTOR_FAILURES.contains(file));
|
EXPECTED_EXTRACTOR_FAILURES.contains(file));
|
||||||
|
|
||||||
|
assertEquals("File should not be modified by extractor", length, file.length());
|
||||||
|
assertEquals("File should not be modified by extractor", modified, file.lastModified());
|
||||||
|
|
||||||
|
handleExtractingAsStream(file);
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
if(!EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
|
if(!EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
|
||||||
throw new Exception("While handling " + file, e);
|
throw new Exception("While handling " + file, e);
|
||||||
@ -68,4 +97,22 @@ public abstract class AbstractFileHandler implements FileHandler {
|
|||||||
extractor.close();
|
extractor.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void handleExtractingAsStream(File file) throws FileNotFoundException,
|
||||||
|
IOException, InvalidFormatException, OpenXML4JException,
|
||||||
|
XmlException {
|
||||||
|
InputStream stream = new FileInputStream(file);
|
||||||
|
try {
|
||||||
|
POITextExtractor streamExtractor = ExtractorFactory.createExtractor(stream);
|
||||||
|
try {
|
||||||
|
assertNotNull(streamExtractor);
|
||||||
|
|
||||||
|
assertNotNull(streamExtractor.getText());
|
||||||
|
} finally {
|
||||||
|
streamExtractor.close();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
stream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,7 @@ package org.apache.poi.stress;
|
|||||||
|
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
|
||||||
@ -43,4 +44,10 @@ public class HPSFFileHandler extends AbstractFileHandler {
|
|||||||
stream.close();
|
stream.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// a test-case to test this locally without executing the full TestAllFiles
|
||||||
|
@Test
|
||||||
|
public void testExtractor() throws Exception {
|
||||||
|
handleExtracting(new File("test-data/hpsf/TestBug44375.xls"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
package org.apache.poi.stress;
|
package org.apache.poi.stress;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
|
||||||
@ -71,4 +72,10 @@ public class XSSFFileHandler extends SpreadsheetHandler {
|
|||||||
stream.close();
|
stream.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// a test-case to test this locally without executing the full TestAllFiles
|
||||||
|
@Test
|
||||||
|
public void testExtractor() throws Exception {
|
||||||
|
handleExtracting(new File("test-data/spreadsheet/56278.xlsx"));
|
||||||
|
}
|
||||||
}
|
}
|
@ -57,6 +57,10 @@ public class HPSFPropertiesExtractor extends POITextExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public String getDocumentSummaryInformationText() {
|
public String getDocumentSummaryInformationText() {
|
||||||
|
if(document == null) { // event based extractor does not have a document
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
DocumentSummaryInformation dsi = document.getDocumentSummaryInformation();
|
DocumentSummaryInformation dsi = document.getDocumentSummaryInformation();
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuffer text = new StringBuffer();
|
||||||
|
|
||||||
@ -78,6 +82,10 @@ public class HPSFPropertiesExtractor extends POITextExtractor {
|
|||||||
return text.toString();
|
return text.toString();
|
||||||
}
|
}
|
||||||
public String getSummaryInformationText() {
|
public String getSummaryInformationText() {
|
||||||
|
if(document == null) { // event based extractor does not have a document
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
SummaryInformation si = document.getSummaryInformation();
|
SummaryInformation si = document.getSummaryInformation();
|
||||||
|
|
||||||
// Just normal properties
|
// Just normal properties
|
||||||
|
@ -19,6 +19,7 @@ package org.apache.poi.hssf.eventusermodel;
|
|||||||
|
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.poi.hssf.eventusermodel.HSSFUserException;
|
import org.apache.poi.hssf.eventusermodel.HSSFUserException;
|
||||||
import org.apache.poi.hssf.record.*;
|
import org.apache.poi.hssf.record.*;
|
||||||
@ -56,11 +57,24 @@ public class HSSFEventFactory {
|
|||||||
* @param req an Instance of HSSFRequest which has your registered listeners
|
* @param req an Instance of HSSFRequest which has your registered listeners
|
||||||
* @param dir a DirectoryNode containing your workbook
|
* @param dir a DirectoryNode containing your workbook
|
||||||
*/
|
*/
|
||||||
public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException {
|
public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException {
|
||||||
InputStream in = dir.createDocumentInputStream("Workbook");
|
// some old documents have "WORKBOOK" or "BOOK"
|
||||||
|
final String name;
|
||||||
|
Set<String> entryNames = dir.getEntryNames();
|
||||||
|
if (entryNames.contains("Workbook")) {
|
||||||
|
name = "Workbook";
|
||||||
|
} else if (entryNames.contains("WORKBOOK")) {
|
||||||
|
name = "WORKBOOK";
|
||||||
|
} else if (entryNames.contains("BOOK")) {
|
||||||
|
name = "BOOK";
|
||||||
|
} else {
|
||||||
|
name = "Workbook";
|
||||||
|
}
|
||||||
|
|
||||||
processEvents(req, in);
|
InputStream in = dir.createDocumentInputStream(name);
|
||||||
}
|
|
||||||
|
processEvents(req, in);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Processes a file into essentially record events.
|
* Processes a file into essentially record events.
|
||||||
|
@ -67,9 +67,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
|
|||||||
* Returns the core document properties, eg author
|
* Returns the core document properties, eg author
|
||||||
*/
|
*/
|
||||||
public String getCorePropertiesText() {
|
public String getCorePropertiesText() {
|
||||||
|
POIXMLDocument document = getDocument();
|
||||||
|
if(document == null) { // event based extractor does not have a document
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuffer text = new StringBuffer();
|
||||||
PackagePropertiesPart props =
|
PackagePropertiesPart props =
|
||||||
getDocument().getProperties().getCoreProperties().getUnderlyingProperties();
|
document.getProperties().getCoreProperties().getUnderlyingProperties();
|
||||||
|
|
||||||
appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
|
appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
|
||||||
appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
|
appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
|
||||||
@ -99,9 +104,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
|
|||||||
* application
|
* application
|
||||||
*/
|
*/
|
||||||
public String getExtendedPropertiesText() {
|
public String getExtendedPropertiesText() {
|
||||||
|
POIXMLDocument document = getDocument();
|
||||||
|
if(document == null) { // event based extractor does not have a document
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuffer text = new StringBuffer();
|
||||||
org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
|
org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
|
||||||
props = getDocument().getProperties().getExtendedProperties().getUnderlyingProperties();
|
props = document.getProperties().getExtendedProperties().getUnderlyingProperties();
|
||||||
|
|
||||||
appendIfPresent(text, "Application", props.getApplication());
|
appendIfPresent(text, "Application", props.getApplication());
|
||||||
appendIfPresent(text, "AppVersion", props.getAppVersion());
|
appendIfPresent(text, "AppVersion", props.getAppVersion());
|
||||||
@ -127,9 +137,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
|
|||||||
*/
|
*/
|
||||||
@SuppressWarnings("deprecation")
|
@SuppressWarnings("deprecation")
|
||||||
public String getCustomPropertiesText() {
|
public String getCustomPropertiesText() {
|
||||||
|
POIXMLDocument document = getDocument();
|
||||||
|
if(document == null) { // event based extractor does not have a document
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
StringBuilder text = new StringBuilder();
|
StringBuilder text = new StringBuilder();
|
||||||
org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
|
org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
|
||||||
props = getDocument().getProperties().getCustomProperties().getUnderlyingProperties();
|
props = document.getProperties().getCustomProperties().getUnderlyingProperties();
|
||||||
|
|
||||||
for(CTProperty property : props.getPropertyArray()) {
|
for(CTProperty property : props.getPropertyArray()) {
|
||||||
String val = "(not implemented!)";
|
String val = "(not implemented!)";
|
||||||
|
@ -265,10 +265,10 @@ public class ExtractorFactory {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an array of text extractors, one for each of
|
* Returns an array of text extractors, one for each of
|
||||||
* the embeded documents in the file (if there are any).
|
* the embedded documents in the file (if there are any).
|
||||||
* If there are no embeded documents, you'll get back an
|
* If there are no embedded documents, you'll get back an
|
||||||
* empty array. Otherwise, you'll get one open
|
* empty array. Otherwise, you'll get one open
|
||||||
* {@link POITextExtractor} for each embeded file.
|
* {@link POITextExtractor} for each embedded file.
|
||||||
*/
|
*/
|
||||||
public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
|
public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
|
||||||
// All the embded directories we spotted
|
// All the embded directories we spotted
|
||||||
|
@ -96,6 +96,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
|||||||
private String formatString;
|
private String formatString;
|
||||||
private final DataFormatter formatter;
|
private final DataFormatter formatter;
|
||||||
private int rowNum;
|
private int rowNum;
|
||||||
|
private int nextRowNum; // some sheets do not have rowNums, Excel can read them so we should try to handle them correctly as well
|
||||||
private String cellRef;
|
private String cellRef;
|
||||||
private boolean formulasNotResults;
|
private boolean formulasNotResults;
|
||||||
|
|
||||||
@ -240,7 +241,12 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
|||||||
headerFooter.setLength(0);
|
headerFooter.setLength(0);
|
||||||
}
|
}
|
||||||
else if("row".equals(name)) {
|
else if("row".equals(name)) {
|
||||||
rowNum = Integer.parseInt(attributes.getValue("r")) - 1;
|
String rowNumStr = attributes.getValue("r");
|
||||||
|
if(rowNumStr != null) {
|
||||||
|
rowNum = Integer.parseInt(rowNumStr) - 1;
|
||||||
|
} else {
|
||||||
|
rowNum = nextRowNum;
|
||||||
|
}
|
||||||
output.startRow(rowNum);
|
output.startRow(rowNum);
|
||||||
}
|
}
|
||||||
// c => cell
|
// c => cell
|
||||||
@ -343,7 +349,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
|||||||
|
|
||||||
case NUMBER:
|
case NUMBER:
|
||||||
String n = value.toString();
|
String n = value.toString();
|
||||||
if (this.formatString != null)
|
if (this.formatString != null && n.length() > 0)
|
||||||
thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString);
|
thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString);
|
||||||
else
|
else
|
||||||
thisStr = n;
|
thisStr = n;
|
||||||
@ -370,6 +376,9 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
|||||||
|
|
||||||
// Finish up the row
|
// Finish up the row
|
||||||
output.endRow(rowNum);
|
output.endRow(rowNum);
|
||||||
|
|
||||||
|
// some sheets do not have rowNum set in the XML, Excel can read them so we should try to read them as well
|
||||||
|
nextRowNum = rowNum + 1;
|
||||||
} else if ("sheetData".equals(name)) {
|
} else if ("sheetData".equals(name)) {
|
||||||
// Handle any "missing" cells which had comments attached
|
// Handle any "missing" cells which had comments attached
|
||||||
checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA);
|
checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA);
|
||||||
|
@ -23,6 +23,7 @@ import java.util.regex.Pattern;
|
|||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
import org.apache.poi.POITextExtractor;
|
import org.apache.poi.POITextExtractor;
|
||||||
|
import org.apache.poi.POIXMLTextExtractor;
|
||||||
import org.apache.poi.hssf.HSSFTestDataSamples;
|
import org.apache.poi.hssf.HSSFTestDataSamples;
|
||||||
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
||||||
import org.apache.poi.xssf.XSSFTestDataSamples;
|
import org.apache.poi.xssf.XSSFTestDataSamples;
|
||||||
@ -155,7 +156,6 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase {
|
|||||||
POITextExtractor[] extractors =
|
POITextExtractor[] extractors =
|
||||||
new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
|
new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
|
||||||
for (int i = 0; i < extractors.length; i++) {
|
for (int i = 0; i < extractors.length; i++) {
|
||||||
@SuppressWarnings("resource")
|
|
||||||
POITextExtractor extractor = extractors[i];
|
POITextExtractor extractor = extractors[i];
|
||||||
|
|
||||||
String text = extractor.getText().replaceAll("[\r\t]", "");
|
String text = extractor.getText().replaceAll("[\r\t]", "");
|
||||||
@ -316,4 +316,25 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase {
|
|||||||
fixture.close();
|
fixture.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testFile56278_normal() throws Exception {
|
||||||
|
// first with normal Text Extractor
|
||||||
|
POIXMLTextExtractor extractor = new XSSFExcelExtractor(
|
||||||
|
XSSFTestDataSamples.openSampleWorkbook("56278.xlsx"));
|
||||||
|
try {
|
||||||
|
assertNotNull(extractor.getText());
|
||||||
|
} finally {
|
||||||
|
extractor.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testFile56278_event() throws Exception {
|
||||||
|
// then with event based one
|
||||||
|
POIXMLTextExtractor extractor = getExtractor("56278.xlsx");
|
||||||
|
try {
|
||||||
|
assertNotNull(extractor.getText());
|
||||||
|
} finally {
|
||||||
|
extractor.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -22,10 +22,12 @@ import java.io.IOException;
|
|||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
import org.apache.poi.POIDataSamples;
|
import org.apache.poi.POIDataSamples;
|
||||||
|
import org.apache.poi.POITextExtractor;
|
||||||
import org.apache.poi.hpsf.Thumbnail;
|
import org.apache.poi.hpsf.Thumbnail;
|
||||||
import org.apache.poi.hssf.HSSFTestDataSamples;
|
import org.apache.poi.hssf.HSSFTestDataSamples;
|
||||||
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
||||||
|
import org.apache.poi.hwpf.extractor.Word6Extractor;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
|
||||||
public final class TestHPSFPropertiesExtractor extends TestCase {
|
public final class TestHPSFPropertiesExtractor extends TestCase {
|
||||||
@ -34,45 +36,53 @@ public final class TestHPSFPropertiesExtractor extends TestCase {
|
|||||||
public void testNormalProperties() throws Exception {
|
public void testNormalProperties() throws Exception {
|
||||||
POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestMickey.doc"));
|
POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestMickey.doc"));
|
||||||
HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
|
HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
|
||||||
ext.getText();
|
try {
|
||||||
|
ext.getText();
|
||||||
// Check each bit in turn
|
|
||||||
String sinfText = ext.getSummaryInformationText();
|
// Check each bit in turn
|
||||||
String dinfText = ext.getDocumentSummaryInformationText();
|
String sinfText = ext.getSummaryInformationText();
|
||||||
|
String dinfText = ext.getDocumentSummaryInformationText();
|
||||||
assertTrue(sinfText.indexOf("TEMPLATE = Normal") > -1);
|
|
||||||
assertTrue(sinfText.indexOf("SUBJECT = sample subject") > -1);
|
assertTrue(sinfText.indexOf("TEMPLATE = Normal") > -1);
|
||||||
assertTrue(dinfText.indexOf("MANAGER = sample manager") > -1);
|
assertTrue(sinfText.indexOf("SUBJECT = sample subject") > -1);
|
||||||
assertTrue(dinfText.indexOf("COMPANY = sample company") > -1);
|
assertTrue(dinfText.indexOf("MANAGER = sample manager") > -1);
|
||||||
|
assertTrue(dinfText.indexOf("COMPANY = sample company") > -1);
|
||||||
// Now overall
|
|
||||||
String text = ext.getText();
|
// Now overall
|
||||||
assertTrue(text.indexOf("TEMPLATE = Normal") > -1);
|
String text = ext.getText();
|
||||||
assertTrue(text.indexOf("SUBJECT = sample subject") > -1);
|
assertTrue(text.indexOf("TEMPLATE = Normal") > -1);
|
||||||
assertTrue(text.indexOf("MANAGER = sample manager") > -1);
|
assertTrue(text.indexOf("SUBJECT = sample subject") > -1);
|
||||||
assertTrue(text.indexOf("COMPANY = sample company") > -1);
|
assertTrue(text.indexOf("MANAGER = sample manager") > -1);
|
||||||
|
assertTrue(text.indexOf("COMPANY = sample company") > -1);
|
||||||
|
} finally {
|
||||||
|
ext.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testNormalUnicodeProperties() throws Exception {
|
public void testNormalUnicodeProperties() throws Exception {
|
||||||
POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestUnicode.xls"));
|
POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestUnicode.xls"));
|
||||||
HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
|
HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
|
||||||
ext.getText();
|
try {
|
||||||
|
ext.getText();
|
||||||
// Check each bit in turn
|
|
||||||
String sinfText = ext.getSummaryInformationText();
|
// Check each bit in turn
|
||||||
String dinfText = ext.getDocumentSummaryInformationText();
|
String sinfText = ext.getSummaryInformationText();
|
||||||
|
String dinfText = ext.getDocumentSummaryInformationText();
|
||||||
assertTrue(sinfText.indexOf("AUTHOR = marshall") > -1);
|
|
||||||
assertTrue(sinfText.indexOf("TITLE = Titel: \u00c4h") > -1);
|
assertTrue(sinfText.indexOf("AUTHOR = marshall") > -1);
|
||||||
assertTrue(dinfText.indexOf("COMPANY = Schreiner") > -1);
|
assertTrue(sinfText.indexOf("TITLE = Titel: \u00c4h") > -1);
|
||||||
assertTrue(dinfText.indexOf("SCALE = false") > -1);
|
assertTrue(dinfText.indexOf("COMPANY = Schreiner") > -1);
|
||||||
|
assertTrue(dinfText.indexOf("SCALE = false") > -1);
|
||||||
// Now overall
|
|
||||||
String text = ext.getText();
|
// Now overall
|
||||||
assertTrue(text.indexOf("AUTHOR = marshall") > -1);
|
String text = ext.getText();
|
||||||
assertTrue(text.indexOf("TITLE = Titel: \u00c4h") > -1);
|
assertTrue(text.indexOf("AUTHOR = marshall") > -1);
|
||||||
assertTrue(text.indexOf("COMPANY = Schreiner") > -1);
|
assertTrue(text.indexOf("TITLE = Titel: \u00c4h") > -1);
|
||||||
assertTrue(text.indexOf("SCALE = false") > -1);
|
assertTrue(text.indexOf("COMPANY = Schreiner") > -1);
|
||||||
|
assertTrue(text.indexOf("SCALE = false") > -1);
|
||||||
|
} finally {
|
||||||
|
ext.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCustomProperties() throws Exception {
|
public void testCustomProperties() throws Exception {
|
||||||
@ -80,18 +90,21 @@ public final class TestHPSFPropertiesExtractor extends TestCase {
|
|||||||
_samples.openResourceAsStream("TestMickey.doc")
|
_samples.openResourceAsStream("TestMickey.doc")
|
||||||
);
|
);
|
||||||
HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
|
HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
|
||||||
|
try {
|
||||||
// Custom properties are part of the document info stream
|
// Custom properties are part of the document info stream
|
||||||
String dinfText = ext.getDocumentSummaryInformationText();
|
String dinfText = ext.getDocumentSummaryInformationText();
|
||||||
assertTrue(dinfText.indexOf("Client = sample client") > -1);
|
assertTrue(dinfText.indexOf("Client = sample client") > -1);
|
||||||
assertTrue(dinfText.indexOf("Division = sample division") > -1);
|
assertTrue(dinfText.indexOf("Division = sample division") > -1);
|
||||||
|
|
||||||
String text = ext.getText();
|
String text = ext.getText();
|
||||||
assertTrue(text.indexOf("Client = sample client") > -1);
|
assertTrue(text.indexOf("Client = sample client") > -1);
|
||||||
assertTrue(text.indexOf("Division = sample division") > -1);
|
assertTrue(text.indexOf("Division = sample division") > -1);
|
||||||
|
} finally {
|
||||||
|
ext.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testConstructors() {
|
public void testConstructors() throws IOException {
|
||||||
POIFSFileSystem fs;
|
POIFSFileSystem fs;
|
||||||
HSSFWorkbook wb;
|
HSSFWorkbook wb;
|
||||||
try {
|
try {
|
||||||
@ -102,9 +115,29 @@ public final class TestHPSFPropertiesExtractor extends TestCase {
|
|||||||
}
|
}
|
||||||
ExcelExtractor excelExt = new ExcelExtractor(wb);
|
ExcelExtractor excelExt = new ExcelExtractor(wb);
|
||||||
|
|
||||||
String fsText = (new HPSFPropertiesExtractor(fs)).getText();
|
final String fsText;
|
||||||
String hwText = (new HPSFPropertiesExtractor(wb)).getText();
|
HPSFPropertiesExtractor fsExt = new HPSFPropertiesExtractor(fs);
|
||||||
String eeText = (new HPSFPropertiesExtractor(excelExt)).getText();
|
try {
|
||||||
|
fsText = fsExt.getText();
|
||||||
|
} finally {
|
||||||
|
fsExt.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
final String hwText;
|
||||||
|
HPSFPropertiesExtractor hwExt = new HPSFPropertiesExtractor(wb);
|
||||||
|
try {
|
||||||
|
hwText = hwExt.getText();
|
||||||
|
} finally {
|
||||||
|
hwExt.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
final String eeText;
|
||||||
|
HPSFPropertiesExtractor eeExt = new HPSFPropertiesExtractor(excelExt);
|
||||||
|
try {
|
||||||
|
eeText = eeExt.getText();
|
||||||
|
} finally {
|
||||||
|
eeExt.close();
|
||||||
|
}
|
||||||
|
|
||||||
assertEquals(fsText, hwText);
|
assertEquals(fsText, hwText);
|
||||||
assertEquals(fsText, eeText);
|
assertEquals(fsText, eeText);
|
||||||
@ -113,13 +146,17 @@ public final class TestHPSFPropertiesExtractor extends TestCase {
|
|||||||
assertTrue(fsText.indexOf("TITLE = Titel: \u00c4h") > -1);
|
assertTrue(fsText.indexOf("TITLE = Titel: \u00c4h") > -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test42726() {
|
public void test42726() throws IOException {
|
||||||
HPSFPropertiesExtractor ex = new HPSFPropertiesExtractor(HSSFTestDataSamples.openSampleWorkbook("42726.xls"));
|
HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(HSSFTestDataSamples.openSampleWorkbook("42726.xls"));
|
||||||
String txt = ex.getText();
|
try {
|
||||||
assertTrue(txt.indexOf("PID_AUTHOR") != -1);
|
String txt = ext.getText();
|
||||||
assertTrue(txt.indexOf("PID_EDITTIME") != -1);
|
assertTrue(txt.indexOf("PID_AUTHOR") != -1);
|
||||||
assertTrue(txt.indexOf("PID_REVNUMBER") != -1);
|
assertTrue(txt.indexOf("PID_EDITTIME") != -1);
|
||||||
assertTrue(txt.indexOf("PID_THUMBNAIL") != -1);
|
assertTrue(txt.indexOf("PID_REVNUMBER") != -1);
|
||||||
|
assertTrue(txt.indexOf("PID_THUMBNAIL") != -1);
|
||||||
|
} finally {
|
||||||
|
ext.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testThumbnail() throws Exception {
|
public void testThumbnail() throws Exception {
|
||||||
@ -131,4 +168,24 @@ public final class TestHPSFPropertiesExtractor extends TestCase {
|
|||||||
assertNotNull(thumbnail.getThumbnailAsWMF());
|
assertNotNull(thumbnail.getThumbnailAsWMF());
|
||||||
wb.close();
|
wb.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testExtractorFromWord6Extractor() throws Exception {
|
||||||
|
POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestMickey.doc"));
|
||||||
|
Word6Extractor wExt = new Word6Extractor(fs);
|
||||||
|
try {
|
||||||
|
POITextExtractor ext = wExt.getMetadataTextExtractor();
|
||||||
|
try {
|
||||||
|
// Now overall
|
||||||
|
String text = ext.getText();
|
||||||
|
assertTrue(text.indexOf("TEMPLATE = Normal") > -1);
|
||||||
|
assertTrue(text.indexOf("SUBJECT = sample subject") > -1);
|
||||||
|
assertTrue(text.indexOf("MANAGER = sample manager") > -1);
|
||||||
|
assertTrue(text.indexOf("COMPANY = sample company") > -1);
|
||||||
|
} finally {
|
||||||
|
ext.close();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
wExt.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -107,8 +107,6 @@ public final class TestHSSFEventFactory extends TestCase {
|
|||||||
POIFSFileSystem fs = new POIFSFileSystem(openSample("42844.xls"));
|
POIFSFileSystem fs = new POIFSFileSystem(openSample("42844.xls"));
|
||||||
HSSFEventFactory factory = new HSSFEventFactory();
|
HSSFEventFactory factory = new HSSFEventFactory();
|
||||||
factory.processWorkbookEvents(req, fs);
|
factory.processWorkbookEvents(req, fs);
|
||||||
|
|
||||||
assertTrue("no errors while processing the file", true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class MockHSSFListener implements HSSFListener {
|
private static class MockHSSFListener implements HSSFListener {
|
||||||
@ -125,4 +123,18 @@ public final class TestHSSFEventFactory extends TestCase {
|
|||||||
records.add(record);
|
records.add(record);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testWithDifferentWorkbookName() throws Exception {
|
||||||
|
HSSFRequest req = new HSSFRequest();
|
||||||
|
MockHSSFListener mockListen = new MockHSSFListener();
|
||||||
|
req.addListenerForAllRecords(mockListen);
|
||||||
|
|
||||||
|
POIFSFileSystem fs = new POIFSFileSystem(openSample("BOOK_in_capitals.xls"));
|
||||||
|
HSSFEventFactory factory = new HSSFEventFactory();
|
||||||
|
factory.processWorkbookEvents(req, fs);
|
||||||
|
|
||||||
|
fs = new POIFSFileSystem(openSample("WORKBOOK_in_capitals.xls"));
|
||||||
|
factory = new HSSFEventFactory();
|
||||||
|
factory.processWorkbookEvents(req, fs);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -46,15 +46,18 @@ public final class TestExcelExtractor extends TestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testSimple() {
|
public void testSimple() throws IOException {
|
||||||
|
|
||||||
ExcelExtractor extractor = createExtractor("Simple.xls");
|
ExcelExtractor extractor = createExtractor("Simple.xls");
|
||||||
|
|
||||||
assertEquals("Sheet1\nreplaceMe\nSheet2\nSheet3\n", extractor.getText());
|
try {
|
||||||
|
assertEquals("Sheet1\nreplaceMe\nSheet2\nSheet3\n", extractor.getText());
|
||||||
// Now turn off sheet names
|
|
||||||
extractor.setIncludeSheetNames(false);
|
// Now turn off sheet names
|
||||||
assertEquals("replaceMe\n", extractor.getText());
|
extractor.setIncludeSheetNames(false);
|
||||||
|
assertEquals("replaceMe\n", extractor.getText());
|
||||||
|
} finally {
|
||||||
|
extractor.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testNumericFormula() {
|
public void testNumericFormula() {
|
||||||
@ -126,45 +129,47 @@ public final class TestExcelExtractor extends TestCase {
|
|||||||
|
|
||||||
|
|
||||||
public void testEventExtractor() throws Exception {
|
public void testEventExtractor() throws Exception {
|
||||||
EventBasedExcelExtractor extractor;
|
|
||||||
|
|
||||||
// First up, a simple file with string
|
// First up, a simple file with string
|
||||||
// based formulas in it
|
// based formulas in it
|
||||||
extractor = new EventBasedExcelExtractor(
|
EventBasedExcelExtractor extractor = new EventBasedExcelExtractor(
|
||||||
new POIFSFileSystem(
|
new POIFSFileSystem(
|
||||||
HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls")
|
HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls")
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
extractor.setIncludeSheetNames(true);
|
try {
|
||||||
|
extractor.setIncludeSheetNames(true);
|
||||||
String text = extractor.getText();
|
|
||||||
assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text);
|
String text = extractor.getText();
|
||||||
|
assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text);
|
||||||
extractor.setIncludeSheetNames(false);
|
|
||||||
extractor.setFormulasNotResults(true);
|
extractor.setIncludeSheetNames(false);
|
||||||
|
extractor.setFormulasNotResults(true);
|
||||||
text = extractor.getText();
|
|
||||||
assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text);
|
text = extractor.getText();
|
||||||
|
assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text);
|
||||||
|
|
||||||
// Now, a slightly longer file with numeric formulas
|
|
||||||
extractor = new EventBasedExcelExtractor(
|
// Now, a slightly longer file with numeric formulas
|
||||||
new POIFSFileSystem(
|
extractor = new EventBasedExcelExtractor(
|
||||||
HSSFTestDataSamples.openSampleFileStream("sumifformula.xls")
|
new POIFSFileSystem(
|
||||||
)
|
HSSFTestDataSamples.openSampleFileStream("sumifformula.xls")
|
||||||
);
|
)
|
||||||
extractor.setIncludeSheetNames(false);
|
);
|
||||||
extractor.setFormulasNotResults(true);
|
extractor.setIncludeSheetNames(false);
|
||||||
|
extractor.setFormulasNotResults(true);
|
||||||
text = extractor.getText();
|
|
||||||
assertEquals(
|
text = extractor.getText();
|
||||||
"1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
|
assertEquals(
|
||||||
"2000\t2\n" +
|
"1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
|
||||||
"3000\t3\n" +
|
"2000\t2\n" +
|
||||||
"4000\t4\n" +
|
"3000\t3\n" +
|
||||||
"5000\t5\n",
|
"4000\t4\n" +
|
||||||
text
|
"5000\t5\n",
|
||||||
);
|
text
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
extractor.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testWithComments() {
|
public void testWithComments() {
|
||||||
@ -272,15 +277,22 @@ public final class TestExcelExtractor extends TestCase {
|
|||||||
HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
|
HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
|
||||||
|
|
||||||
ExcelExtractor exA = new ExcelExtractor(wbA);
|
ExcelExtractor exA = new ExcelExtractor(wbA);
|
||||||
ExcelExtractor exB = new ExcelExtractor(wbB);
|
try {
|
||||||
|
ExcelExtractor exB = new ExcelExtractor(wbB);
|
||||||
assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
|
try {
|
||||||
exA.getText());
|
assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
|
||||||
assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
|
exA.getText());
|
||||||
|
assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
|
||||||
assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
|
|
||||||
exB.getText());
|
assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
|
||||||
assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
|
exB.getText());
|
||||||
|
assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
|
||||||
|
} finally {
|
||||||
|
exB.close();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
exA.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -299,21 +311,32 @@ public final class TestExcelExtractor extends TestCase {
|
|||||||
HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
|
HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
|
||||||
|
|
||||||
ExcelExtractor exA = new ExcelExtractor(wbA);
|
ExcelExtractor exA = new ExcelExtractor(wbA);
|
||||||
ExcelExtractor exB = new ExcelExtractor(wbB);
|
try {
|
||||||
|
ExcelExtractor exB = new ExcelExtractor(wbB);
|
||||||
assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
|
try {
|
||||||
exA.getText());
|
assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
|
||||||
assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
|
exA.getText());
|
||||||
|
assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
|
||||||
assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
|
|
||||||
exB.getText());
|
assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
|
||||||
assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
|
exB.getText());
|
||||||
|
assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
|
||||||
// And the base file too
|
|
||||||
ExcelExtractor ex = new ExcelExtractor(fs);
|
// And the base file too
|
||||||
assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n",
|
ExcelExtractor ex = new ExcelExtractor(fs);
|
||||||
ex.getText());
|
try {
|
||||||
assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle());
|
assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n",
|
||||||
|
ex.getText());
|
||||||
|
assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle());
|
||||||
|
} finally {
|
||||||
|
ex.close();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
exB.close();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
exA.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
Reference in New Issue
Block a user