From f043c4401717854713599806e64a5bcbb306f718 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Fri, 27 Feb 2015 14:58:41 +0000 Subject: [PATCH] * Verify some more Text-Extraction features as part of integration tests, fix some NullPointerExceptions that showed up now because the event-based extraction does not have a Document available * Also handle a XLSX which does not have row-numbers in the sheet-xml. Excel can read it so it makes sense to also allow to read it in the XSSFSheetXMLHandler * Remove some Eclipse warnings in test-code git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1662691 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/stress/AbstractFileHandler.java | 47 +++++ .../apache/poi/stress/HPSFFileHandler.java | 7 + .../apache/poi/stress/XSSFFileHandler.java | 7 + .../extractor/HPSFPropertiesExtractor.java | 8 + .../hssf/eventusermodel/HSSFEventFactory.java | 22 ++- .../poi/POIXMLPropertiesTextExtractor.java | 23 ++- .../poi/extractor/ExtractorFactory.java | 6 +- .../eventusermodel/XSSFSheetXMLHandler.java | 13 +- .../TestXSSFEventBasedExcelExtractor.java | 23 ++- .../TestHPSFPropertiesExtractor.java | 165 ++++++++++++------ .../eventusermodel/TestHSSFEventFactory.java | 16 +- .../hssf/extractor/TestExcelExtractor.java | 151 +++++++++------- 12 files changed, 354 insertions(+), 134 deletions(-) diff --git a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java index d7e303e4a..8a27e6d0e 100644 --- a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java @@ -16,15 +16,23 @@ ==================================================================== */ package org.apache.poi.stress; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; import java.util.HashSet; import java.util.Set; import org.apache.poi.POITextExtractor; import org.apache.poi.extractor.ExtractorFactory; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.xmlbeans.XmlException; public abstract class AbstractFileHandler implements FileHandler { public static final Set EXPECTED_EXTRACTOR_FAILURES = new HashSet(); @@ -48,6 +56,22 @@ public abstract class AbstractFileHandler implements FileHandler { } public void handleExtracting(File file) throws Exception { + boolean before = ExtractorFactory.getThreadPrefersEventExtractors(); + try { + ExtractorFactory.setThreadPrefersEventExtractors(true); + handleExtractingInternal(file); + + ExtractorFactory.setThreadPrefersEventExtractors(false); + handleExtractingInternal(file); + } finally { + ExtractorFactory.setThreadPrefersEventExtractors(before); + } + } + + private void handleExtractingInternal(File file) throws Exception { + long length = file.length(); + long modified = file.lastModified(); + POITextExtractor extractor = ExtractorFactory.createExtractor(file); try { assertNotNull(extractor); @@ -60,6 +84,11 @@ public abstract class AbstractFileHandler implements FileHandler { assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!", EXPECTED_EXTRACTOR_FAILURES.contains(file)); + + assertEquals("File should not be modified by extractor", length, file.length()); + assertEquals("File should not be modified by extractor", modified, file.lastModified()); + + handleExtractingAsStream(file); } catch (IllegalArgumentException e) { if(!EXPECTED_EXTRACTOR_FAILURES.contains(file)) { throw new Exception("While handling " + file, e); @@ -68,4 +97,22 @@ public abstract class AbstractFileHandler implements FileHandler { extractor.close(); } } + + private void handleExtractingAsStream(File file) throws FileNotFoundException, + IOException, InvalidFormatException, OpenXML4JException, + XmlException { + InputStream stream = new FileInputStream(file); + try { + POITextExtractor streamExtractor = ExtractorFactory.createExtractor(stream); + try { + assertNotNull(streamExtractor); + + assertNotNull(streamExtractor.getText()); + } finally { + streamExtractor.close(); + } + } finally { + stream.close(); + } + } } diff --git a/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java index 477ee859c..6a53b2e00 100644 --- a/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java @@ -18,6 +18,7 @@ package org.apache.poi.stress; import static org.junit.Assert.assertNotNull; +import java.io.File; import java.io.FileInputStream; import java.io.InputStream; @@ -43,4 +44,10 @@ public class HPSFFileHandler extends AbstractFileHandler { stream.close(); } } + + // a test-case to test this locally without executing the full TestAllFiles + @Test + public void testExtractor() throws Exception { + handleExtracting(new File("test-data/hpsf/TestBug44375.xls")); + } } diff --git a/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java index 54a386ea0..a268ed465 100644 --- a/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java @@ -17,6 +17,7 @@ package org.apache.poi.stress; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.FileInputStream; import java.io.InputStream; @@ -71,4 +72,10 @@ public class XSSFFileHandler extends SpreadsheetHandler { stream.close(); } } + + // a test-case to test this locally without executing the full TestAllFiles + @Test + public void testExtractor() throws Exception { + handleExtracting(new File("test-data/spreadsheet/56278.xlsx")); + } } \ No newline at end of file diff --git a/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java b/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java index ce5301ac6..1a0db0389 100644 --- a/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java +++ b/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java @@ -57,6 +57,10 @@ public class HPSFPropertiesExtractor extends POITextExtractor { } public String getDocumentSummaryInformationText() { + if(document == null) { // event based extractor does not have a document + return ""; + } + DocumentSummaryInformation dsi = document.getDocumentSummaryInformation(); StringBuffer text = new StringBuffer(); @@ -78,6 +82,10 @@ public class HPSFPropertiesExtractor extends POITextExtractor { return text.toString(); } public String getSummaryInformationText() { + if(document == null) { // event based extractor does not have a document + return ""; + } + SummaryInformation si = document.getSummaryInformation(); // Just normal properties diff --git a/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java b/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java index 45ab8d813..4d0b894e0 100644 --- a/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java +++ b/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java @@ -19,6 +19,7 @@ package org.apache.poi.hssf.eventusermodel; import java.io.InputStream; import java.io.IOException; +import java.util.Set; import org.apache.poi.hssf.eventusermodel.HSSFUserException; import org.apache.poi.hssf.record.*; @@ -56,11 +57,24 @@ public class HSSFEventFactory { * @param req an Instance of HSSFRequest which has your registered listeners * @param dir a DirectoryNode containing your workbook */ - public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException { - InputStream in = dir.createDocumentInputStream("Workbook"); + public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException { + // some old documents have "WORKBOOK" or "BOOK" + final String name; + Set entryNames = dir.getEntryNames(); + if (entryNames.contains("Workbook")) { + name = "Workbook"; + } else if (entryNames.contains("WORKBOOK")) { + name = "WORKBOOK"; + } else if (entryNames.contains("BOOK")) { + name = "BOOK"; + } else { + name = "Workbook"; + } - processEvents(req, in); - } + InputStream in = dir.createDocumentInputStream(name); + + processEvents(req, in); + } /** * Processes a file into essentially record events. diff --git a/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java index ce576439f..8a35a34e4 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java +++ b/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java @@ -67,9 +67,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor { * Returns the core document properties, eg author */ public String getCorePropertiesText() { + POIXMLDocument document = getDocument(); + if(document == null) { // event based extractor does not have a document + return ""; + } + StringBuffer text = new StringBuffer(); - PackagePropertiesPart props = - getDocument().getProperties().getCoreProperties().getUnderlyingProperties(); + PackagePropertiesPart props = + document.getProperties().getCoreProperties().getUnderlyingProperties(); appendIfPresent(text, "Category", props.getCategoryProperty().getValue()); appendIfPresent(text, "Category", props.getCategoryProperty().getValue()); @@ -99,9 +104,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor { * application */ public String getExtendedPropertiesText() { + POIXMLDocument document = getDocument(); + if(document == null) { // event based extractor does not have a document + return ""; + } + StringBuffer text = new StringBuffer(); org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties - props = getDocument().getProperties().getExtendedProperties().getUnderlyingProperties(); + props = document.getProperties().getExtendedProperties().getUnderlyingProperties(); appendIfPresent(text, "Application", props.getApplication()); appendIfPresent(text, "AppVersion", props.getAppVersion()); @@ -127,9 +137,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor { */ @SuppressWarnings("deprecation") public String getCustomPropertiesText() { + POIXMLDocument document = getDocument(); + if(document == null) { // event based extractor does not have a document + return ""; + } + StringBuilder text = new StringBuilder(); org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties - props = getDocument().getProperties().getCustomProperties().getUnderlyingProperties(); + props = document.getProperties().getCustomProperties().getUnderlyingProperties(); for(CTProperty property : props.getPropertyArray()) { String val = "(not implemented!)"; diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java index a0b6b5db1..60a0f5181 100644 --- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java +++ b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java @@ -265,10 +265,10 @@ public class ExtractorFactory { /** * Returns an array of text extractors, one for each of - * the embeded documents in the file (if there are any). - * If there are no embeded documents, you'll get back an + * the embedded documents in the file (if there are any). + * If there are no embedded documents, you'll get back an * empty array. Otherwise, you'll get one open - * {@link POITextExtractor} for each embeded file. + * {@link POITextExtractor} for each embedded file. */ public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, InvalidFormatException, OpenXML4JException, XmlException { // All the embded directories we spotted diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java index 767742687..227441859 100644 --- a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java +++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java @@ -96,6 +96,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler { private String formatString; private final DataFormatter formatter; private int rowNum; + private int nextRowNum; // some sheets do not have rowNums, Excel can read them so we should try to handle them correctly as well private String cellRef; private boolean formulasNotResults; @@ -240,7 +241,12 @@ public class XSSFSheetXMLHandler extends DefaultHandler { headerFooter.setLength(0); } else if("row".equals(name)) { - rowNum = Integer.parseInt(attributes.getValue("r")) - 1; + String rowNumStr = attributes.getValue("r"); + if(rowNumStr != null) { + rowNum = Integer.parseInt(rowNumStr) - 1; + } else { + rowNum = nextRowNum; + } output.startRow(rowNum); } // c => cell @@ -343,7 +349,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler { case NUMBER: String n = value.toString(); - if (this.formatString != null) + if (this.formatString != null && n.length() > 0) thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString); else thisStr = n; @@ -370,6 +376,9 @@ public class XSSFSheetXMLHandler extends DefaultHandler { // Finish up the row output.endRow(rowNum); + + // some sheets do not have rowNum set in the XML, Excel can read them so we should try to read them as well + nextRowNum = rowNum + 1; } else if ("sheetData".equals(name)) { // Handle any "missing" cells which had comments attached checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA); diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java index e48787be0..b56b3791f 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java @@ -23,6 +23,7 @@ import java.util.regex.Pattern; import junit.framework.TestCase; import org.apache.poi.POITextExtractor; +import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.extractor.ExcelExtractor; import org.apache.poi.xssf.XSSFTestDataSamples; @@ -155,7 +156,6 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase { POITextExtractor[] extractors = new POITextExtractor[] { ooxmlExtractor, ole2Extractor }; for (int i = 0; i < extractors.length; i++) { - @SuppressWarnings("resource") POITextExtractor extractor = extractors[i]; String text = extractor.getText().replaceAll("[\r\t]", ""); @@ -316,4 +316,25 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase { fixture.close(); } } + + public void testFile56278_normal() throws Exception { + // first with normal Text Extractor + POIXMLTextExtractor extractor = new XSSFExcelExtractor( + XSSFTestDataSamples.openSampleWorkbook("56278.xlsx")); + try { + assertNotNull(extractor.getText()); + } finally { + extractor.close(); + } + } + + public void testFile56278_event() throws Exception { + // then with event based one + POIXMLTextExtractor extractor = getExtractor("56278.xlsx"); + try { + assertNotNull(extractor.getText()); + } finally { + extractor.close(); + } + } } diff --git a/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java b/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java index 22238d75f..c6ad03db2 100644 --- a/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java +++ b/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java @@ -22,10 +22,12 @@ import java.io.IOException; import junit.framework.TestCase; import org.apache.poi.POIDataSamples; +import org.apache.poi.POITextExtractor; import org.apache.poi.hpsf.Thumbnail; import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.extractor.ExcelExtractor; import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.hwpf.extractor.Word6Extractor; import org.apache.poi.poifs.filesystem.POIFSFileSystem; public final class TestHPSFPropertiesExtractor extends TestCase { @@ -34,45 +36,53 @@ public final class TestHPSFPropertiesExtractor extends TestCase { public void testNormalProperties() throws Exception { POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestMickey.doc")); HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs); - ext.getText(); - - // Check each bit in turn - String sinfText = ext.getSummaryInformationText(); - String dinfText = ext.getDocumentSummaryInformationText(); - - assertTrue(sinfText.indexOf("TEMPLATE = Normal") > -1); - assertTrue(sinfText.indexOf("SUBJECT = sample subject") > -1); - assertTrue(dinfText.indexOf("MANAGER = sample manager") > -1); - assertTrue(dinfText.indexOf("COMPANY = sample company") > -1); - - // Now overall - String text = ext.getText(); - assertTrue(text.indexOf("TEMPLATE = Normal") > -1); - assertTrue(text.indexOf("SUBJECT = sample subject") > -1); - assertTrue(text.indexOf("MANAGER = sample manager") > -1); - assertTrue(text.indexOf("COMPANY = sample company") > -1); + try { + ext.getText(); + + // Check each bit in turn + String sinfText = ext.getSummaryInformationText(); + String dinfText = ext.getDocumentSummaryInformationText(); + + assertTrue(sinfText.indexOf("TEMPLATE = Normal") > -1); + assertTrue(sinfText.indexOf("SUBJECT = sample subject") > -1); + assertTrue(dinfText.indexOf("MANAGER = sample manager") > -1); + assertTrue(dinfText.indexOf("COMPANY = sample company") > -1); + + // Now overall + String text = ext.getText(); + assertTrue(text.indexOf("TEMPLATE = Normal") > -1); + assertTrue(text.indexOf("SUBJECT = sample subject") > -1); + assertTrue(text.indexOf("MANAGER = sample manager") > -1); + assertTrue(text.indexOf("COMPANY = sample company") > -1); + } finally { + ext.close(); + } } public void testNormalUnicodeProperties() throws Exception { POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestUnicode.xls")); HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs); - ext.getText(); - - // Check each bit in turn - String sinfText = ext.getSummaryInformationText(); - String dinfText = ext.getDocumentSummaryInformationText(); - - assertTrue(sinfText.indexOf("AUTHOR = marshall") > -1); - assertTrue(sinfText.indexOf("TITLE = Titel: \u00c4h") > -1); - assertTrue(dinfText.indexOf("COMPANY = Schreiner") > -1); - assertTrue(dinfText.indexOf("SCALE = false") > -1); - - // Now overall - String text = ext.getText(); - assertTrue(text.indexOf("AUTHOR = marshall") > -1); - assertTrue(text.indexOf("TITLE = Titel: \u00c4h") > -1); - assertTrue(text.indexOf("COMPANY = Schreiner") > -1); - assertTrue(text.indexOf("SCALE = false") > -1); + try { + ext.getText(); + + // Check each bit in turn + String sinfText = ext.getSummaryInformationText(); + String dinfText = ext.getDocumentSummaryInformationText(); + + assertTrue(sinfText.indexOf("AUTHOR = marshall") > -1); + assertTrue(sinfText.indexOf("TITLE = Titel: \u00c4h") > -1); + assertTrue(dinfText.indexOf("COMPANY = Schreiner") > -1); + assertTrue(dinfText.indexOf("SCALE = false") > -1); + + // Now overall + String text = ext.getText(); + assertTrue(text.indexOf("AUTHOR = marshall") > -1); + assertTrue(text.indexOf("TITLE = Titel: \u00c4h") > -1); + assertTrue(text.indexOf("COMPANY = Schreiner") > -1); + assertTrue(text.indexOf("SCALE = false") > -1); + } finally { + ext.close(); + } } public void testCustomProperties() throws Exception { @@ -80,18 +90,21 @@ public final class TestHPSFPropertiesExtractor extends TestCase { _samples.openResourceAsStream("TestMickey.doc") ); HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs); - - // Custom properties are part of the document info stream - String dinfText = ext.getDocumentSummaryInformationText(); - assertTrue(dinfText.indexOf("Client = sample client") > -1); - assertTrue(dinfText.indexOf("Division = sample division") > -1); - - String text = ext.getText(); - assertTrue(text.indexOf("Client = sample client") > -1); - assertTrue(text.indexOf("Division = sample division") > -1); + try { + // Custom properties are part of the document info stream + String dinfText = ext.getDocumentSummaryInformationText(); + assertTrue(dinfText.indexOf("Client = sample client") > -1); + assertTrue(dinfText.indexOf("Division = sample division") > -1); + + String text = ext.getText(); + assertTrue(text.indexOf("Client = sample client") > -1); + assertTrue(text.indexOf("Division = sample division") > -1); + } finally { + ext.close(); + } } - public void testConstructors() { + public void testConstructors() throws IOException { POIFSFileSystem fs; HSSFWorkbook wb; try { @@ -102,9 +115,29 @@ public final class TestHPSFPropertiesExtractor extends TestCase { } ExcelExtractor excelExt = new ExcelExtractor(wb); - String fsText = (new HPSFPropertiesExtractor(fs)).getText(); - String hwText = (new HPSFPropertiesExtractor(wb)).getText(); - String eeText = (new HPSFPropertiesExtractor(excelExt)).getText(); + final String fsText; + HPSFPropertiesExtractor fsExt = new HPSFPropertiesExtractor(fs); + try { + fsText = fsExt.getText(); + } finally { + fsExt.close(); + } + + final String hwText; + HPSFPropertiesExtractor hwExt = new HPSFPropertiesExtractor(wb); + try { + hwText = hwExt.getText(); + } finally { + hwExt.close(); + } + + final String eeText; + HPSFPropertiesExtractor eeExt = new HPSFPropertiesExtractor(excelExt); + try { + eeText = eeExt.getText(); + } finally { + eeExt.close(); + } assertEquals(fsText, hwText); assertEquals(fsText, eeText); @@ -113,13 +146,17 @@ public final class TestHPSFPropertiesExtractor extends TestCase { assertTrue(fsText.indexOf("TITLE = Titel: \u00c4h") > -1); } - public void test42726() { - HPSFPropertiesExtractor ex = new HPSFPropertiesExtractor(HSSFTestDataSamples.openSampleWorkbook("42726.xls")); - String txt = ex.getText(); - assertTrue(txt.indexOf("PID_AUTHOR") != -1); - assertTrue(txt.indexOf("PID_EDITTIME") != -1); - assertTrue(txt.indexOf("PID_REVNUMBER") != -1); - assertTrue(txt.indexOf("PID_THUMBNAIL") != -1); + public void test42726() throws IOException { + HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(HSSFTestDataSamples.openSampleWorkbook("42726.xls")); + try { + String txt = ext.getText(); + assertTrue(txt.indexOf("PID_AUTHOR") != -1); + assertTrue(txt.indexOf("PID_EDITTIME") != -1); + assertTrue(txt.indexOf("PID_REVNUMBER") != -1); + assertTrue(txt.indexOf("PID_THUMBNAIL") != -1); + } finally { + ext.close(); + } } public void testThumbnail() throws Exception { @@ -131,4 +168,24 @@ public final class TestHPSFPropertiesExtractor extends TestCase { assertNotNull(thumbnail.getThumbnailAsWMF()); wb.close(); } + + public void testExtractorFromWord6Extractor() throws Exception { + POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestMickey.doc")); + Word6Extractor wExt = new Word6Extractor(fs); + try { + POITextExtractor ext = wExt.getMetadataTextExtractor(); + try { + // Now overall + String text = ext.getText(); + assertTrue(text.indexOf("TEMPLATE = Normal") > -1); + assertTrue(text.indexOf("SUBJECT = sample subject") > -1); + assertTrue(text.indexOf("MANAGER = sample manager") > -1); + assertTrue(text.indexOf("COMPANY = sample company") > -1); + } finally { + ext.close(); + } + } finally { + wExt.close(); + } + } } diff --git a/src/testcases/org/apache/poi/hssf/eventusermodel/TestHSSFEventFactory.java b/src/testcases/org/apache/poi/hssf/eventusermodel/TestHSSFEventFactory.java index 976633108..ff76cfa19 100644 --- a/src/testcases/org/apache/poi/hssf/eventusermodel/TestHSSFEventFactory.java +++ b/src/testcases/org/apache/poi/hssf/eventusermodel/TestHSSFEventFactory.java @@ -107,8 +107,6 @@ public final class TestHSSFEventFactory extends TestCase { POIFSFileSystem fs = new POIFSFileSystem(openSample("42844.xls")); HSSFEventFactory factory = new HSSFEventFactory(); factory.processWorkbookEvents(req, fs); - - assertTrue("no errors while processing the file", true); } private static class MockHSSFListener implements HSSFListener { @@ -125,4 +123,18 @@ public final class TestHSSFEventFactory extends TestCase { records.add(record); } } + + public void testWithDifferentWorkbookName() throws Exception { + HSSFRequest req = new HSSFRequest(); + MockHSSFListener mockListen = new MockHSSFListener(); + req.addListenerForAllRecords(mockListen); + + POIFSFileSystem fs = new POIFSFileSystem(openSample("BOOK_in_capitals.xls")); + HSSFEventFactory factory = new HSSFEventFactory(); + factory.processWorkbookEvents(req, fs); + + fs = new POIFSFileSystem(openSample("WORKBOOK_in_capitals.xls")); + factory = new HSSFEventFactory(); + factory.processWorkbookEvents(req, fs); + } } diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java index de8221029..f7584ff11 100644 --- a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java +++ b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java @@ -46,15 +46,18 @@ public final class TestExcelExtractor extends TestCase { } - public void testSimple() { - + public void testSimple() throws IOException { ExcelExtractor extractor = createExtractor("Simple.xls"); - assertEquals("Sheet1\nreplaceMe\nSheet2\nSheet3\n", extractor.getText()); - - // Now turn off sheet names - extractor.setIncludeSheetNames(false); - assertEquals("replaceMe\n", extractor.getText()); + try { + assertEquals("Sheet1\nreplaceMe\nSheet2\nSheet3\n", extractor.getText()); + + // Now turn off sheet names + extractor.setIncludeSheetNames(false); + assertEquals("replaceMe\n", extractor.getText()); + } finally { + extractor.close(); + } } public void testNumericFormula() { @@ -126,45 +129,47 @@ public final class TestExcelExtractor extends TestCase { public void testEventExtractor() throws Exception { - EventBasedExcelExtractor extractor; - // First up, a simple file with string // based formulas in it - extractor = new EventBasedExcelExtractor( + EventBasedExcelExtractor extractor = new EventBasedExcelExtractor( new POIFSFileSystem( HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls") ) ); - extractor.setIncludeSheetNames(true); - - String text = extractor.getText(); - assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text); - - extractor.setIncludeSheetNames(false); - extractor.setFormulasNotResults(true); - - text = extractor.getText(); - assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text); - - - // Now, a slightly longer file with numeric formulas - extractor = new EventBasedExcelExtractor( - new POIFSFileSystem( - HSSFTestDataSamples.openSampleFileStream("sumifformula.xls") - ) - ); - extractor.setIncludeSheetNames(false); - extractor.setFormulasNotResults(true); - - text = extractor.getText(); - assertEquals( - "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" + - "2000\t2\n" + - "3000\t3\n" + - "4000\t4\n" + - "5000\t5\n", - text - ); + try { + extractor.setIncludeSheetNames(true); + + String text = extractor.getText(); + assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text); + + extractor.setIncludeSheetNames(false); + extractor.setFormulasNotResults(true); + + text = extractor.getText(); + assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text); + + + // Now, a slightly longer file with numeric formulas + extractor = new EventBasedExcelExtractor( + new POIFSFileSystem( + HSSFTestDataSamples.openSampleFileStream("sumifformula.xls") + ) + ); + extractor.setIncludeSheetNames(false); + extractor.setFormulasNotResults(true); + + text = extractor.getText(); + assertEquals( + "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" + + "2000\t2\n" + + "3000\t3\n" + + "4000\t4\n" + + "5000\t5\n", + text + ); + } finally { + extractor.close(); + } } public void testWithComments() { @@ -272,15 +277,22 @@ public final class TestExcelExtractor extends TestCase { HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true); ExcelExtractor exA = new ExcelExtractor(wbA); - ExcelExtractor exB = new ExcelExtractor(wbB); - - assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", - exA.getText()); - assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); - - assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n", - exB.getText()); - assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); + try { + ExcelExtractor exB = new ExcelExtractor(wbB); + try { + assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", + exA.getText()); + assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); + + assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n", + exB.getText()); + assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); + } finally { + exB.close(); + } + } finally { + exA.close(); + } } /** @@ -299,21 +311,32 @@ public final class TestExcelExtractor extends TestCase { HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true); ExcelExtractor exA = new ExcelExtractor(wbA); - ExcelExtractor exB = new ExcelExtractor(wbB); - - assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", - exA.getText()); - assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); - - assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n", - exB.getText()); - assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); - - // And the base file too - ExcelExtractor ex = new ExcelExtractor(fs); - assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n", - ex.getText()); - assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle()); + try { + ExcelExtractor exB = new ExcelExtractor(wbB); + try { + assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", + exA.getText()); + assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); + + assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n", + exB.getText()); + assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); + + // And the base file too + ExcelExtractor ex = new ExcelExtractor(fs); + try { + assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n", + ex.getText()); + assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle()); + } finally { + ex.close(); + } + } finally { + exB.close(); + } + } finally { + exA.close(); + } } /**