Refactor to pull out the list of Excel 97+ directory entry names to a common place, avoiding duplication. Also starts on unit testing #59074

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1732579 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2016-02-26 23:32:17 +00:00
parent 41d693fe15
commit 0b8283b37c
6 changed files with 52 additions and 28 deletions

View File

@ -25,6 +25,7 @@ import org.apache.poi.hssf.eventusermodel.HSSFUserException;
import org.apache.poi.hssf.record.*; import org.apache.poi.hssf.record.*;
import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
/** /**
* Low level event based HSSF reader. Pass either a DocumentInputStream to * Low level event based HSSF reader. Pass either a DocumentInputStream to
@ -59,20 +60,20 @@ public class HSSFEventFactory {
*/ */
public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException { public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException {
// some old documents have "WORKBOOK" or "BOOK" // some old documents have "WORKBOOK" or "BOOK"
final String name; String name = null;
Set<String> entryNames = dir.getEntryNames(); Set<String> entryNames = dir.getEntryNames();
if (entryNames.contains("Workbook")) { for (String potentialName : WORKBOOK_DIR_ENTRY_NAMES) {
name = "Workbook"; if (entryNames.contains(potentialName)) {
} else if (entryNames.contains("WORKBOOK")) { name = potentialName;
name = "WORKBOOK"; break;
} else if (entryNames.contains("BOOK")) { }
name = "BOOK"; }
} else { // If in doubt, go for the default
name = "Workbook"; if (name == null) {
name = WORKBOOK_DIR_ENTRY_NAMES[0];
} }
InputStream in = dir.createDocumentInputStream(name); InputStream in = dir.createDocumentInputStream(name);
processEvents(req, in); processEvents(req, in);
} }

View File

@ -123,6 +123,16 @@ public final class InternalWorkbook {
*/ */
private static final int MAX_SENSITIVE_SHEET_NAME_LEN = 31; private static final int MAX_SENSITIVE_SHEET_NAME_LEN = 31;
/**
* Normally, the Workbook will be in a POIFS Stream called
* "Workbook". However, some weird XLS generators use "WORKBOOK"
* or "BOOK".
*/
public static final String[] WORKBOOK_DIR_ENTRY_NAMES = {
"Workbook", // as per BIFF8 spec
"WORKBOOK", // Typically from third party programs
"BOOK", // Typically odd Crystal Reports exports
};
private static final POILogger log = POILogFactory.getLogger(InternalWorkbook.class); private static final POILogger log = POILogFactory.getLogger(InternalWorkbook.class);
private static final int DEBUG = POILogger.DEBUG; private static final int DEBUG = POILogger.DEBUG;

View File

@ -17,6 +17,8 @@
package org.apache.poi.hssf.usermodel; package org.apache.poi.hssf.usermodel;
import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
@ -95,7 +97,6 @@ import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger; import org.apache.poi.util.POILogger;
/** /**
* High level representation of a workbook. This is the first object most users * High level representation of a workbook. This is the first object most users
* will construct whether they are reading or writing a workbook. It is also the * will construct whether they are reading or writing a workbook. It is also the
@ -243,17 +244,6 @@ public final class HSSFWorkbook extends POIDocument implements org.apache.poi.ss
this(fs.getRoot(), fs, preserveNodes); this(fs.getRoot(), fs, preserveNodes);
} }
/**
* Normally, the Workbook will be in a POIFS Stream
* called "Workbook". However, some weird XLS generators use "WORKBOOK"
*/
private static final String[] WORKBOOK_DIR_ENTRY_NAMES = {
"Workbook", // as per BIFF8 spec
"WORKBOOK", // Typically from third party programs
"BOOK", // Typically odd Crystal Reports exports
};
public static String getWorkbookDirEntryName(DirectoryNode directory) { public static String getWorkbookDirEntryName(DirectoryNode directory) {
for (int i = 0; i < WORKBOOK_DIR_ENTRY_NAMES.length; i++) { for (int i = 0; i < WORKBOOK_DIR_ENTRY_NAMES.length; i++) {

View File

@ -66,6 +66,8 @@ import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFRelation; import org.apache.poi.xwpf.usermodel.XWPFRelation;
import org.apache.xmlbeans.XmlException; import org.apache.xmlbeans.XmlException;
import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
/** /**
* Figures out the correct POITextExtractor for your supplied * Figures out the correct POITextExtractor for your supplied
* document, and returns it. * document, and returns it.
@ -301,14 +303,14 @@ public class ExtractorFactory {
{ {
// Look for certain entries in the stream, to figure it // Look for certain entries in the stream, to figure it
// out from // out from
if (poifsDir.hasEntry("Workbook") || for (String workbookName : WORKBOOK_DIR_ENTRY_NAMES) {
// some XLS files have different entry-names if (poifsDir.hasEntry(workbookName)) {
poifsDir.hasEntry("WORKBOOK") || poifsDir.hasEntry("BOOK")) {
if (getPreferEventExtractor()) { if (getPreferEventExtractor()) {
return new EventBasedExcelExtractor(poifsDir); return new EventBasedExcelExtractor(poifsDir);
} }
return new ExcelExtractor(poifsDir); return new ExcelExtractor(poifsDir);
} }
}
if (poifsDir.hasEntry("WordDocument")) { if (poifsDir.hasEntry("WordDocument")) {
// Old or new style word document? // Old or new style word document?

View File

@ -54,6 +54,7 @@ import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.xmlbeans.XmlException; import org.apache.xmlbeans.XmlException;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test; import org.junit.Test;
/** /**
@ -920,4 +921,23 @@ public class TestExtractorFactory {
} }
} }
} }
/**
* #59074 - No supported documents found in the OLE2 stream on
* a valid Excel file
*/
@Ignore
@Test
public void a() throws Exception {
POITextExtractor ext = ExtractorFactory.createExtractor(
POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
assertNotNull(ext);
String text = ext.getText();
ext.close();
System.err.println(text);
assertNotNull(text);
assertTrue(text.contains("test"));
}
} }

View File

@ -36,6 +36,7 @@ public class TestBiffViewer extends BaseXLSIteratingTest {
EXCLUDED.add("43493.xls"); // HSSFWorkbook cannot open it as well EXCLUDED.add("43493.xls"); // HSSFWorkbook cannot open it as well
EXCLUDED.add("password.xls"); EXCLUDED.add("password.xls");
EXCLUDED.add("46904.xls"); EXCLUDED.add("46904.xls");
EXCLUDED.add("59074.xls"); // Biff 5 / Excel 95
EXCLUDED.add("35897-type4.xls"); // unsupported crypto api header EXCLUDED.add("35897-type4.xls"); // unsupported crypto api header
EXCLUDED.add("xor-encryption-abc.xls"); // unsupported XOR-encryption EXCLUDED.add("xor-encryption-abc.xls"); // unsupported XOR-encryption
EXCLUDED.add("testEXCEL_2.xls"); // Biff 2 / Excel 2, pre-OLE2 EXCLUDED.add("testEXCEL_2.xls"); // Biff 2 / Excel 2, pre-OLE2