* Verify some more Text-Extraction features as part of integration tests, fix some NullPointerExceptions that showed up now because the event-based extraction does not have a Document available

* Also handle a XLSX which does not have row-numbers in the sheet-xml. Excel can read it so it makes sense to also allow to read it in the XSSFSheetXMLHandler 
* Remove some Eclipse warnings in test-code

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1662691 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2015-02-27 14:58:41 +00:00
parent 80d0d3b5bd
commit f043c44017
12 changed files with 354 additions and 134 deletions

View File

@ -16,15 +16,23 @@
==================================================================== */ ==================================================================== */
package org.apache.poi.stress; package org.apache.poi.stress;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
import java.io.File; import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
import org.apache.poi.POITextExtractor; import org.apache.poi.POITextExtractor;
import org.apache.poi.extractor.ExtractorFactory; import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.xmlbeans.XmlException;
public abstract class AbstractFileHandler implements FileHandler { public abstract class AbstractFileHandler implements FileHandler {
public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<String>(); public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<String>();
@ -48,6 +56,22 @@ public abstract class AbstractFileHandler implements FileHandler {
} }
public void handleExtracting(File file) throws Exception { public void handleExtracting(File file) throws Exception {
boolean before = ExtractorFactory.getThreadPrefersEventExtractors();
try {
ExtractorFactory.setThreadPrefersEventExtractors(true);
handleExtractingInternal(file);
ExtractorFactory.setThreadPrefersEventExtractors(false);
handleExtractingInternal(file);
} finally {
ExtractorFactory.setThreadPrefersEventExtractors(before);
}
}
private void handleExtractingInternal(File file) throws Exception {
long length = file.length();
long modified = file.lastModified();
POITextExtractor extractor = ExtractorFactory.createExtractor(file); POITextExtractor extractor = ExtractorFactory.createExtractor(file);
try { try {
assertNotNull(extractor); assertNotNull(extractor);
@ -60,6 +84,11 @@ public abstract class AbstractFileHandler implements FileHandler {
assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!", assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!",
EXPECTED_EXTRACTOR_FAILURES.contains(file)); EXPECTED_EXTRACTOR_FAILURES.contains(file));
assertEquals("File should not be modified by extractor", length, file.length());
assertEquals("File should not be modified by extractor", modified, file.lastModified());
handleExtractingAsStream(file);
} catch (IllegalArgumentException e) { } catch (IllegalArgumentException e) {
if(!EXPECTED_EXTRACTOR_FAILURES.contains(file)) { if(!EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
throw new Exception("While handling " + file, e); throw new Exception("While handling " + file, e);
@ -68,4 +97,22 @@ public abstract class AbstractFileHandler implements FileHandler {
extractor.close(); extractor.close();
} }
} }
private void handleExtractingAsStream(File file) throws FileNotFoundException,
IOException, InvalidFormatException, OpenXML4JException,
XmlException {
InputStream stream = new FileInputStream(file);
try {
POITextExtractor streamExtractor = ExtractorFactory.createExtractor(stream);
try {
assertNotNull(streamExtractor);
assertNotNull(streamExtractor.getText());
} finally {
streamExtractor.close();
}
} finally {
stream.close();
}
}
} }

View File

@ -18,6 +18,7 @@ package org.apache.poi.stress;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.InputStream; import java.io.InputStream;
@ -43,4 +44,10 @@ public class HPSFFileHandler extends AbstractFileHandler {
stream.close(); stream.close();
} }
} }
// a test-case to test this locally without executing the full TestAllFiles
@Test
public void testExtractor() throws Exception {
handleExtracting(new File("test-data/hpsf/TestBug44375.xls"));
}
} }

View File

@ -17,6 +17,7 @@
package org.apache.poi.stress; package org.apache.poi.stress;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.InputStream; import java.io.InputStream;
@ -71,4 +72,10 @@ public class XSSFFileHandler extends SpreadsheetHandler {
stream.close(); stream.close();
} }
} }
// a test-case to test this locally without executing the full TestAllFiles
@Test
public void testExtractor() throws Exception {
handleExtracting(new File("test-data/spreadsheet/56278.xlsx"));
}
} }

View File

@ -57,6 +57,10 @@ public class HPSFPropertiesExtractor extends POITextExtractor {
} }
public String getDocumentSummaryInformationText() { public String getDocumentSummaryInformationText() {
if(document == null) { // event based extractor does not have a document
return "";
}
DocumentSummaryInformation dsi = document.getDocumentSummaryInformation(); DocumentSummaryInformation dsi = document.getDocumentSummaryInformation();
StringBuffer text = new StringBuffer(); StringBuffer text = new StringBuffer();
@ -78,6 +82,10 @@ public class HPSFPropertiesExtractor extends POITextExtractor {
return text.toString(); return text.toString();
} }
public String getSummaryInformationText() { public String getSummaryInformationText() {
if(document == null) { // event based extractor does not have a document
return "";
}
SummaryInformation si = document.getSummaryInformation(); SummaryInformation si = document.getSummaryInformation();
// Just normal properties // Just normal properties

View File

@ -19,6 +19,7 @@ package org.apache.poi.hssf.eventusermodel;
import java.io.InputStream; import java.io.InputStream;
import java.io.IOException; import java.io.IOException;
import java.util.Set;
import org.apache.poi.hssf.eventusermodel.HSSFUserException; import org.apache.poi.hssf.eventusermodel.HSSFUserException;
import org.apache.poi.hssf.record.*; import org.apache.poi.hssf.record.*;
@ -56,11 +57,24 @@ public class HSSFEventFactory {
* @param req an Instance of HSSFRequest which has your registered listeners * @param req an Instance of HSSFRequest which has your registered listeners
* @param dir a DirectoryNode containing your workbook * @param dir a DirectoryNode containing your workbook
*/ */
public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException { public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException {
InputStream in = dir.createDocumentInputStream("Workbook"); // some old documents have "WORKBOOK" or "BOOK"
final String name;
Set<String> entryNames = dir.getEntryNames();
if (entryNames.contains("Workbook")) {
name = "Workbook";
} else if (entryNames.contains("WORKBOOK")) {
name = "WORKBOOK";
} else if (entryNames.contains("BOOK")) {
name = "BOOK";
} else {
name = "Workbook";
}
processEvents(req, in); InputStream in = dir.createDocumentInputStream(name);
}
processEvents(req, in);
}
/** /**
* Processes a file into essentially record events. * Processes a file into essentially record events.

View File

@ -67,9 +67,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
* Returns the core document properties, eg author * Returns the core document properties, eg author
*/ */
public String getCorePropertiesText() { public String getCorePropertiesText() {
POIXMLDocument document = getDocument();
if(document == null) { // event based extractor does not have a document
return "";
}
StringBuffer text = new StringBuffer(); StringBuffer text = new StringBuffer();
PackagePropertiesPart props = PackagePropertiesPart props =
getDocument().getProperties().getCoreProperties().getUnderlyingProperties(); document.getProperties().getCoreProperties().getUnderlyingProperties();
appendIfPresent(text, "Category", props.getCategoryProperty().getValue()); appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
appendIfPresent(text, "Category", props.getCategoryProperty().getValue()); appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
@ -99,9 +104,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
* application * application
*/ */
public String getExtendedPropertiesText() { public String getExtendedPropertiesText() {
POIXMLDocument document = getDocument();
if(document == null) { // event based extractor does not have a document
return "";
}
StringBuffer text = new StringBuffer(); StringBuffer text = new StringBuffer();
org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
props = getDocument().getProperties().getExtendedProperties().getUnderlyingProperties(); props = document.getProperties().getExtendedProperties().getUnderlyingProperties();
appendIfPresent(text, "Application", props.getApplication()); appendIfPresent(text, "Application", props.getApplication());
appendIfPresent(text, "AppVersion", props.getAppVersion()); appendIfPresent(text, "AppVersion", props.getAppVersion());
@ -127,9 +137,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
*/ */
@SuppressWarnings("deprecation") @SuppressWarnings("deprecation")
public String getCustomPropertiesText() { public String getCustomPropertiesText() {
POIXMLDocument document = getDocument();
if(document == null) { // event based extractor does not have a document
return "";
}
StringBuilder text = new StringBuilder(); StringBuilder text = new StringBuilder();
org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
props = getDocument().getProperties().getCustomProperties().getUnderlyingProperties(); props = document.getProperties().getCustomProperties().getUnderlyingProperties();
for(CTProperty property : props.getPropertyArray()) { for(CTProperty property : props.getPropertyArray()) {
String val = "(not implemented!)"; String val = "(not implemented!)";

View File

@ -265,10 +265,10 @@ public class ExtractorFactory {
/** /**
* Returns an array of text extractors, one for each of * Returns an array of text extractors, one for each of
* the embeded documents in the file (if there are any). * the embedded documents in the file (if there are any).
* If there are no embeded documents, you'll get back an * If there are no embedded documents, you'll get back an
* empty array. Otherwise, you'll get one open * empty array. Otherwise, you'll get one open
* {@link POITextExtractor} for each embeded file. * {@link POITextExtractor} for each embedded file.
*/ */
public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, InvalidFormatException, OpenXML4JException, XmlException { public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
// All the embded directories we spotted // All the embded directories we spotted

View File

@ -96,6 +96,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
private String formatString; private String formatString;
private final DataFormatter formatter; private final DataFormatter formatter;
private int rowNum; private int rowNum;
private int nextRowNum; // some sheets do not have rowNums, Excel can read them so we should try to handle them correctly as well
private String cellRef; private String cellRef;
private boolean formulasNotResults; private boolean formulasNotResults;
@ -240,7 +241,12 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
headerFooter.setLength(0); headerFooter.setLength(0);
} }
else if("row".equals(name)) { else if("row".equals(name)) {
rowNum = Integer.parseInt(attributes.getValue("r")) - 1; String rowNumStr = attributes.getValue("r");
if(rowNumStr != null) {
rowNum = Integer.parseInt(rowNumStr) - 1;
} else {
rowNum = nextRowNum;
}
output.startRow(rowNum); output.startRow(rowNum);
} }
// c => cell // c => cell
@ -343,7 +349,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
case NUMBER: case NUMBER:
String n = value.toString(); String n = value.toString();
if (this.formatString != null) if (this.formatString != null && n.length() > 0)
thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString); thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString);
else else
thisStr = n; thisStr = n;
@ -370,6 +376,9 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
// Finish up the row // Finish up the row
output.endRow(rowNum); output.endRow(rowNum);
// some sheets do not have rowNum set in the XML, Excel can read them so we should try to read them as well
nextRowNum = rowNum + 1;
} else if ("sheetData".equals(name)) { } else if ("sheetData".equals(name)) {
// Handle any "missing" cells which had comments attached // Handle any "missing" cells which had comments attached
checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA); checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA);

View File

@ -23,6 +23,7 @@ import java.util.regex.Pattern;
import junit.framework.TestCase; import junit.framework.TestCase;
import org.apache.poi.POITextExtractor; import org.apache.poi.POITextExtractor;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.extractor.ExcelExtractor; import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.xssf.XSSFTestDataSamples; import org.apache.poi.xssf.XSSFTestDataSamples;
@ -155,7 +156,6 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase {
POITextExtractor[] extractors = POITextExtractor[] extractors =
new POITextExtractor[] { ooxmlExtractor, ole2Extractor }; new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
for (int i = 0; i < extractors.length; i++) { for (int i = 0; i < extractors.length; i++) {
@SuppressWarnings("resource")
POITextExtractor extractor = extractors[i]; POITextExtractor extractor = extractors[i];
String text = extractor.getText().replaceAll("[\r\t]", ""); String text = extractor.getText().replaceAll("[\r\t]", "");
@ -316,4 +316,25 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase {
fixture.close(); fixture.close();
} }
} }
public void testFile56278_normal() throws Exception {
// first with normal Text Extractor
POIXMLTextExtractor extractor = new XSSFExcelExtractor(
XSSFTestDataSamples.openSampleWorkbook("56278.xlsx"));
try {
assertNotNull(extractor.getText());
} finally {
extractor.close();
}
}
public void testFile56278_event() throws Exception {
// then with event based one
POIXMLTextExtractor extractor = getExtractor("56278.xlsx");
try {
assertNotNull(extractor.getText());
} finally {
extractor.close();
}
}
} }

View File

@ -22,10 +22,12 @@ import java.io.IOException;
import junit.framework.TestCase; import junit.framework.TestCase;
import org.apache.poi.POIDataSamples; import org.apache.poi.POIDataSamples;
import org.apache.poi.POITextExtractor;
import org.apache.poi.hpsf.Thumbnail; import org.apache.poi.hpsf.Thumbnail;
import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.extractor.ExcelExtractor; import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public final class TestHPSFPropertiesExtractor extends TestCase { public final class TestHPSFPropertiesExtractor extends TestCase {
@ -34,45 +36,53 @@ public final class TestHPSFPropertiesExtractor extends TestCase {
public void testNormalProperties() throws Exception { public void testNormalProperties() throws Exception {
POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestMickey.doc")); POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestMickey.doc"));
HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs); HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
ext.getText(); try {
ext.getText();
// Check each bit in turn
String sinfText = ext.getSummaryInformationText(); // Check each bit in turn
String dinfText = ext.getDocumentSummaryInformationText(); String sinfText = ext.getSummaryInformationText();
String dinfText = ext.getDocumentSummaryInformationText();
assertTrue(sinfText.indexOf("TEMPLATE = Normal") > -1);
assertTrue(sinfText.indexOf("SUBJECT = sample subject") > -1); assertTrue(sinfText.indexOf("TEMPLATE = Normal") > -1);
assertTrue(dinfText.indexOf("MANAGER = sample manager") > -1); assertTrue(sinfText.indexOf("SUBJECT = sample subject") > -1);
assertTrue(dinfText.indexOf("COMPANY = sample company") > -1); assertTrue(dinfText.indexOf("MANAGER = sample manager") > -1);
assertTrue(dinfText.indexOf("COMPANY = sample company") > -1);
// Now overall
String text = ext.getText(); // Now overall
assertTrue(text.indexOf("TEMPLATE = Normal") > -1); String text = ext.getText();
assertTrue(text.indexOf("SUBJECT = sample subject") > -1); assertTrue(text.indexOf("TEMPLATE = Normal") > -1);
assertTrue(text.indexOf("MANAGER = sample manager") > -1); assertTrue(text.indexOf("SUBJECT = sample subject") > -1);
assertTrue(text.indexOf("COMPANY = sample company") > -1); assertTrue(text.indexOf("MANAGER = sample manager") > -1);
assertTrue(text.indexOf("COMPANY = sample company") > -1);
} finally {
ext.close();
}
} }
public void testNormalUnicodeProperties() throws Exception { public void testNormalUnicodeProperties() throws Exception {
POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestUnicode.xls")); POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestUnicode.xls"));
HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs); HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
ext.getText(); try {
ext.getText();
// Check each bit in turn
String sinfText = ext.getSummaryInformationText(); // Check each bit in turn
String dinfText = ext.getDocumentSummaryInformationText(); String sinfText = ext.getSummaryInformationText();
String dinfText = ext.getDocumentSummaryInformationText();
assertTrue(sinfText.indexOf("AUTHOR = marshall") > -1);
assertTrue(sinfText.indexOf("TITLE = Titel: \u00c4h") > -1); assertTrue(sinfText.indexOf("AUTHOR = marshall") > -1);
assertTrue(dinfText.indexOf("COMPANY = Schreiner") > -1); assertTrue(sinfText.indexOf("TITLE = Titel: \u00c4h") > -1);
assertTrue(dinfText.indexOf("SCALE = false") > -1); assertTrue(dinfText.indexOf("COMPANY = Schreiner") > -1);
assertTrue(dinfText.indexOf("SCALE = false") > -1);
// Now overall
String text = ext.getText(); // Now overall
assertTrue(text.indexOf("AUTHOR = marshall") > -1); String text = ext.getText();
assertTrue(text.indexOf("TITLE = Titel: \u00c4h") > -1); assertTrue(text.indexOf("AUTHOR = marshall") > -1);
assertTrue(text.indexOf("COMPANY = Schreiner") > -1); assertTrue(text.indexOf("TITLE = Titel: \u00c4h") > -1);
assertTrue(text.indexOf("SCALE = false") > -1); assertTrue(text.indexOf("COMPANY = Schreiner") > -1);
assertTrue(text.indexOf("SCALE = false") > -1);
} finally {
ext.close();
}
} }
public void testCustomProperties() throws Exception { public void testCustomProperties() throws Exception {
@ -80,18 +90,21 @@ public final class TestHPSFPropertiesExtractor extends TestCase {
_samples.openResourceAsStream("TestMickey.doc") _samples.openResourceAsStream("TestMickey.doc")
); );
HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs); HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
try {
// Custom properties are part of the document info stream // Custom properties are part of the document info stream
String dinfText = ext.getDocumentSummaryInformationText(); String dinfText = ext.getDocumentSummaryInformationText();
assertTrue(dinfText.indexOf("Client = sample client") > -1); assertTrue(dinfText.indexOf("Client = sample client") > -1);
assertTrue(dinfText.indexOf("Division = sample division") > -1); assertTrue(dinfText.indexOf("Division = sample division") > -1);
String text = ext.getText(); String text = ext.getText();
assertTrue(text.indexOf("Client = sample client") > -1); assertTrue(text.indexOf("Client = sample client") > -1);
assertTrue(text.indexOf("Division = sample division") > -1); assertTrue(text.indexOf("Division = sample division") > -1);
} finally {
ext.close();
}
} }
public void testConstructors() { public void testConstructors() throws IOException {
POIFSFileSystem fs; POIFSFileSystem fs;
HSSFWorkbook wb; HSSFWorkbook wb;
try { try {
@ -102,9 +115,29 @@ public final class TestHPSFPropertiesExtractor extends TestCase {
} }
ExcelExtractor excelExt = new ExcelExtractor(wb); ExcelExtractor excelExt = new ExcelExtractor(wb);
String fsText = (new HPSFPropertiesExtractor(fs)).getText(); final String fsText;
String hwText = (new HPSFPropertiesExtractor(wb)).getText(); HPSFPropertiesExtractor fsExt = new HPSFPropertiesExtractor(fs);
String eeText = (new HPSFPropertiesExtractor(excelExt)).getText(); try {
fsText = fsExt.getText();
} finally {
fsExt.close();
}
final String hwText;
HPSFPropertiesExtractor hwExt = new HPSFPropertiesExtractor(wb);
try {
hwText = hwExt.getText();
} finally {
hwExt.close();
}
final String eeText;
HPSFPropertiesExtractor eeExt = new HPSFPropertiesExtractor(excelExt);
try {
eeText = eeExt.getText();
} finally {
eeExt.close();
}
assertEquals(fsText, hwText); assertEquals(fsText, hwText);
assertEquals(fsText, eeText); assertEquals(fsText, eeText);
@ -113,13 +146,17 @@ public final class TestHPSFPropertiesExtractor extends TestCase {
assertTrue(fsText.indexOf("TITLE = Titel: \u00c4h") > -1); assertTrue(fsText.indexOf("TITLE = Titel: \u00c4h") > -1);
} }
public void test42726() { public void test42726() throws IOException {
HPSFPropertiesExtractor ex = new HPSFPropertiesExtractor(HSSFTestDataSamples.openSampleWorkbook("42726.xls")); HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(HSSFTestDataSamples.openSampleWorkbook("42726.xls"));
String txt = ex.getText(); try {
assertTrue(txt.indexOf("PID_AUTHOR") != -1); String txt = ext.getText();
assertTrue(txt.indexOf("PID_EDITTIME") != -1); assertTrue(txt.indexOf("PID_AUTHOR") != -1);
assertTrue(txt.indexOf("PID_REVNUMBER") != -1); assertTrue(txt.indexOf("PID_EDITTIME") != -1);
assertTrue(txt.indexOf("PID_THUMBNAIL") != -1); assertTrue(txt.indexOf("PID_REVNUMBER") != -1);
assertTrue(txt.indexOf("PID_THUMBNAIL") != -1);
} finally {
ext.close();
}
} }
public void testThumbnail() throws Exception { public void testThumbnail() throws Exception {
@ -131,4 +168,24 @@ public final class TestHPSFPropertiesExtractor extends TestCase {
assertNotNull(thumbnail.getThumbnailAsWMF()); assertNotNull(thumbnail.getThumbnailAsWMF());
wb.close(); wb.close();
} }
public void testExtractorFromWord6Extractor() throws Exception {
POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestMickey.doc"));
Word6Extractor wExt = new Word6Extractor(fs);
try {
POITextExtractor ext = wExt.getMetadataTextExtractor();
try {
// Now overall
String text = ext.getText();
assertTrue(text.indexOf("TEMPLATE = Normal") > -1);
assertTrue(text.indexOf("SUBJECT = sample subject") > -1);
assertTrue(text.indexOf("MANAGER = sample manager") > -1);
assertTrue(text.indexOf("COMPANY = sample company") > -1);
} finally {
ext.close();
}
} finally {
wExt.close();
}
}
} }

View File

@ -107,8 +107,6 @@ public final class TestHSSFEventFactory extends TestCase {
POIFSFileSystem fs = new POIFSFileSystem(openSample("42844.xls")); POIFSFileSystem fs = new POIFSFileSystem(openSample("42844.xls"));
HSSFEventFactory factory = new HSSFEventFactory(); HSSFEventFactory factory = new HSSFEventFactory();
factory.processWorkbookEvents(req, fs); factory.processWorkbookEvents(req, fs);
assertTrue("no errors while processing the file", true);
} }
private static class MockHSSFListener implements HSSFListener { private static class MockHSSFListener implements HSSFListener {
@ -125,4 +123,18 @@ public final class TestHSSFEventFactory extends TestCase {
records.add(record); records.add(record);
} }
} }
public void testWithDifferentWorkbookName() throws Exception {
HSSFRequest req = new HSSFRequest();
MockHSSFListener mockListen = new MockHSSFListener();
req.addListenerForAllRecords(mockListen);
POIFSFileSystem fs = new POIFSFileSystem(openSample("BOOK_in_capitals.xls"));
HSSFEventFactory factory = new HSSFEventFactory();
factory.processWorkbookEvents(req, fs);
fs = new POIFSFileSystem(openSample("WORKBOOK_in_capitals.xls"));
factory = new HSSFEventFactory();
factory.processWorkbookEvents(req, fs);
}
} }

View File

@ -46,15 +46,18 @@ public final class TestExcelExtractor extends TestCase {
} }
public void testSimple() { public void testSimple() throws IOException {
ExcelExtractor extractor = createExtractor("Simple.xls"); ExcelExtractor extractor = createExtractor("Simple.xls");
assertEquals("Sheet1\nreplaceMe\nSheet2\nSheet3\n", extractor.getText()); try {
assertEquals("Sheet1\nreplaceMe\nSheet2\nSheet3\n", extractor.getText());
// Now turn off sheet names
extractor.setIncludeSheetNames(false); // Now turn off sheet names
assertEquals("replaceMe\n", extractor.getText()); extractor.setIncludeSheetNames(false);
assertEquals("replaceMe\n", extractor.getText());
} finally {
extractor.close();
}
} }
public void testNumericFormula() { public void testNumericFormula() {
@ -126,45 +129,47 @@ public final class TestExcelExtractor extends TestCase {
public void testEventExtractor() throws Exception { public void testEventExtractor() throws Exception {
EventBasedExcelExtractor extractor;
// First up, a simple file with string // First up, a simple file with string
// based formulas in it // based formulas in it
extractor = new EventBasedExcelExtractor( EventBasedExcelExtractor extractor = new EventBasedExcelExtractor(
new POIFSFileSystem( new POIFSFileSystem(
HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls") HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls")
) )
); );
extractor.setIncludeSheetNames(true); try {
extractor.setIncludeSheetNames(true);
String text = extractor.getText();
assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text); String text = extractor.getText();
assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text);
extractor.setIncludeSheetNames(false);
extractor.setFormulasNotResults(true); extractor.setIncludeSheetNames(false);
extractor.setFormulasNotResults(true);
text = extractor.getText();
assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text); text = extractor.getText();
assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text);
// Now, a slightly longer file with numeric formulas
extractor = new EventBasedExcelExtractor( // Now, a slightly longer file with numeric formulas
new POIFSFileSystem( extractor = new EventBasedExcelExtractor(
HSSFTestDataSamples.openSampleFileStream("sumifformula.xls") new POIFSFileSystem(
) HSSFTestDataSamples.openSampleFileStream("sumifformula.xls")
); )
extractor.setIncludeSheetNames(false); );
extractor.setFormulasNotResults(true); extractor.setIncludeSheetNames(false);
extractor.setFormulasNotResults(true);
text = extractor.getText();
assertEquals( text = extractor.getText();
"1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" + assertEquals(
"2000\t2\n" + "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
"3000\t3\n" + "2000\t2\n" +
"4000\t4\n" + "3000\t3\n" +
"5000\t5\n", "4000\t4\n" +
text "5000\t5\n",
); text
);
} finally {
extractor.close();
}
} }
public void testWithComments() { public void testWithComments() {
@ -272,15 +277,22 @@ public final class TestExcelExtractor extends TestCase {
HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true); HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
ExcelExtractor exA = new ExcelExtractor(wbA); ExcelExtractor exA = new ExcelExtractor(wbA);
ExcelExtractor exB = new ExcelExtractor(wbB); try {
ExcelExtractor exB = new ExcelExtractor(wbB);
assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", try {
exA.getText()); assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); exA.getText());
assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
exB.getText()); assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); exB.getText());
assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
} finally {
exB.close();
}
} finally {
exA.close();
}
} }
/** /**
@ -299,21 +311,32 @@ public final class TestExcelExtractor extends TestCase {
HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true); HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
ExcelExtractor exA = new ExcelExtractor(wbA); ExcelExtractor exA = new ExcelExtractor(wbA);
ExcelExtractor exB = new ExcelExtractor(wbB); try {
ExcelExtractor exB = new ExcelExtractor(wbB);
assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", try {
exA.getText()); assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); exA.getText());
assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
exB.getText()); assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); exB.getText());
assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
// And the base file too
ExcelExtractor ex = new ExcelExtractor(fs); // And the base file too
assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n", ExcelExtractor ex = new ExcelExtractor(fs);
ex.getText()); try {
assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle()); assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n",
ex.getText());
assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle());
} finally {
ex.close();
}
} finally {
exB.close();
}
} finally {
exA.close();
}
} }
/** /**