#59074 More helpful exception if Excel 1-95 files are given to ExtractorFactory

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1732587 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2016-02-26 23:56:31 +00:00
parent 856261f4d0
commit 62b5b0d874
3 changed files with 12 additions and 21 deletions

View File

@ -17,8 +17,8 @@
package org.apache.poi.hssf.extractor;
import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
import static org.apache.poi.hssf.model.InternalWorkbook.OLD_WORKBOOK_DIR_ENTRY_NAME;
import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
import java.io.BufferedInputStream;
import java.io.Closeable;

View File

@ -38,9 +38,9 @@ import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.hsmf.MAPIMessage;
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
import org.apache.poi.hssf.OldExcelFormatException;
import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hssf.extractor.OldExcelExtractor;
import org.apache.poi.hwpf.OldWordFileFormatException;
import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
@ -314,7 +314,8 @@ public class ExtractorFactory {
}
}
if (poifsDir.hasEntry(OLD_WORKBOOK_DIR_ENTRY_NAME)) {
throw new IllegalArgumentException("Excel 1-95 file found, call OldExcelExtractor directly");
throw new OldExcelFormatException("Old Excel Spreadsheet format (1-95) "
+ "found. Please call OldExcelExtractor directly for basic text extraction");
}
if (poifsDir.hasEntry("WordDocument")) {

View File

@ -26,8 +26,6 @@ import static org.junit.Assert.fail;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.poi.POIDataSamples;
import org.apache.poi.POIOLE2TextExtractor;
@ -38,12 +36,12 @@ import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
import org.apache.poi.hssf.OldExcelFormatException;
import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@ -52,9 +50,7 @@ import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.xmlbeans.XmlException;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
/**
@ -923,21 +919,15 @@ public class TestExtractorFactory {
}
/**
* #59074 - No supported documents found in the OLE2 stream on
* a valid Excel file
* #59074 - Excel 95 files should give a helpful message, not just
* "No supported documents found in the OLE2 stream"
*/
@Ignore
@Test
public void a() throws Exception {
POITextExtractor ext = ExtractorFactory.createExtractor(
POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
assertNotNull(ext);
String text = ext.getText();
ext.close();
System.err.println(text);
assertNotNull(text);
assertTrue(text.contains("test"));
try {
ExtractorFactory.createExtractor(
POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
fail("Old excel formats not supported via ExtractorFactory");
} catch (OldExcelFormatException e) {}
}
}