#59074 More helpful exception if Excel 1-95 files are given to ExtractorFactory

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1732587 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2016-02-26 23:56:31 +00:00
parent 856261f4d0
commit 62b5b0d874
3 changed files with 12 additions and 21 deletions

View File

@ -17,8 +17,8 @@
package org.apache.poi.hssf.extractor; package org.apache.poi.hssf.extractor;
import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
import static org.apache.poi.hssf.model.InternalWorkbook.OLD_WORKBOOK_DIR_ENTRY_NAME; import static org.apache.poi.hssf.model.InternalWorkbook.OLD_WORKBOOK_DIR_ENTRY_NAME;
import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
import java.io.BufferedInputStream; import java.io.BufferedInputStream;
import java.io.Closeable; import java.io.Closeable;

View File

@ -38,9 +38,9 @@ import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.hsmf.MAPIMessage; import org.apache.poi.hsmf.MAPIMessage;
import org.apache.poi.hsmf.datatypes.AttachmentChunks; import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.extractor.OutlookTextExtactor; import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
import org.apache.poi.hssf.OldExcelFormatException;
import org.apache.poi.hssf.extractor.EventBasedExcelExtractor; import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
import org.apache.poi.hssf.extractor.ExcelExtractor; import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hssf.extractor.OldExcelExtractor;
import org.apache.poi.hwpf.OldWordFileFormatException; import org.apache.poi.hwpf.OldWordFileFormatException;
import org.apache.poi.hwpf.extractor.Word6Extractor; import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.hwpf.extractor.WordExtractor;
@ -314,7 +314,8 @@ public class ExtractorFactory {
} }
} }
if (poifsDir.hasEntry(OLD_WORKBOOK_DIR_ENTRY_NAME)) { if (poifsDir.hasEntry(OLD_WORKBOOK_DIR_ENTRY_NAME)) {
throw new IllegalArgumentException("Excel 1-95 file found, call OldExcelExtractor directly"); throw new OldExcelFormatException("Old Excel Spreadsheet format (1-95) "
+ "found. Please call OldExcelExtractor directly for basic text extraction");
} }
if (poifsDir.hasEntry("WordDocument")) { if (poifsDir.hasEntry("WordDocument")) {

View File

@ -26,8 +26,6 @@ import static org.junit.Assert.fail;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.poi.POIDataSamples; import org.apache.poi.POIDataSamples;
import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.POIOLE2TextExtractor;
@ -38,12 +36,12 @@ import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpbf.extractor.PublisherTextExtractor; import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
import org.apache.poi.hslf.extractor.PowerPointExtractor; import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.hsmf.extractor.OutlookTextExtactor; import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
import org.apache.poi.hssf.OldExcelFormatException;
import org.apache.poi.hssf.extractor.EventBasedExcelExtractor; import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
import org.apache.poi.hssf.extractor.ExcelExtractor; import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hwpf.extractor.Word6Extractor; import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException; import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess; import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@ -52,9 +50,7 @@ import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.poi.xssf.extractor.XSSFExcelExtractor; import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.xmlbeans.XmlException;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test; import org.junit.Test;
/** /**
@ -923,21 +919,15 @@ public class TestExtractorFactory {
} }
/** /**
* #59074 - No supported documents found in the OLE2 stream on * #59074 - Excel 95 files should give a helpful message, not just
* a valid Excel file * "No supported documents found in the OLE2 stream"
*/ */
@Ignore
@Test @Test
public void a() throws Exception { public void a() throws Exception {
POITextExtractor ext = ExtractorFactory.createExtractor( try {
POIDataSamples.getSpreadSheetInstance().getFile("59074.xls")); ExtractorFactory.createExtractor(
assertNotNull(ext); POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
fail("Old excel formats not supported via ExtractorFactory");
String text = ext.getText(); } catch (OldExcelFormatException e) {}
ext.close();
System.err.println(text);
assertNotNull(text);
assertTrue(text.contains("test"));
} }
} }