Refactor to pull out the list of Excel 97+ directory entry names to a common place, avoiding duplication. Also starts on unit testing #59074

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1732579 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2016-02-26 23:32:17 +00:00
parent 41d693fe15
commit 0b8283b37c
6 changed files with 52 additions and 28 deletions

View File

@ -25,6 +25,7 @@ import org.apache.poi.hssf.eventusermodel.HSSFUserException;
import org.apache.poi.hssf.record.*;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
/**
* Low level event based HSSF reader. Pass either a DocumentInputStream to
@ -59,20 +60,20 @@ public class HSSFEventFactory {
*/
public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException {
// some old documents have "WORKBOOK" or "BOOK"
final String name;
String name = null;
Set<String> entryNames = dir.getEntryNames();
if (entryNames.contains("Workbook")) {
name = "Workbook";
} else if (entryNames.contains("WORKBOOK")) {
name = "WORKBOOK";
} else if (entryNames.contains("BOOK")) {
name = "BOOK";
} else {
name = "Workbook";
for (String potentialName : WORKBOOK_DIR_ENTRY_NAMES) {
if (entryNames.contains(potentialName)) {
name = potentialName;
break;
}
}
// If in doubt, go for the default
if (name == null) {
name = WORKBOOK_DIR_ENTRY_NAMES[0];
}
InputStream in = dir.createDocumentInputStream(name);
processEvents(req, in);
}

View File

@ -123,6 +123,16 @@ public final class InternalWorkbook {
*/
private static final int MAX_SENSITIVE_SHEET_NAME_LEN = 31;
/**
* Normally, the Workbook will be in a POIFS Stream called
* "Workbook". However, some weird XLS generators use "WORKBOOK"
* or "BOOK".
*/
public static final String[] WORKBOOK_DIR_ENTRY_NAMES = {
"Workbook", // as per BIFF8 spec
"WORKBOOK", // Typically from third party programs
"BOOK", // Typically odd Crystal Reports exports
};
private static final POILogger log = POILogFactory.getLogger(InternalWorkbook.class);
private static final int DEBUG = POILogger.DEBUG;

View File

@ -17,6 +17,8 @@
package org.apache.poi.hssf.usermodel;
import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
@ -95,7 +97,6 @@ import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/**
* High level representation of a workbook. This is the first object most users
* will construct whether they are reading or writing a workbook. It is also the
@ -243,17 +244,6 @@ public final class HSSFWorkbook extends POIDocument implements org.apache.poi.ss
this(fs.getRoot(), fs, preserveNodes);
}
/**
* Normally, the Workbook will be in a POIFS Stream
* called "Workbook". However, some weird XLS generators use "WORKBOOK"
*/
private static final String[] WORKBOOK_DIR_ENTRY_NAMES = {
"Workbook", // as per BIFF8 spec
"WORKBOOK", // Typically from third party programs
"BOOK", // Typically odd Crystal Reports exports
};
public static String getWorkbookDirEntryName(DirectoryNode directory) {
for (int i = 0; i < WORKBOOK_DIR_ENTRY_NAMES.length; i++) {

View File

@ -66,6 +66,8 @@ import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFRelation;
import org.apache.xmlbeans.XmlException;
import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
/**
* Figures out the correct POITextExtractor for your supplied
* document, and returns it.
@ -301,13 +303,13 @@ public class ExtractorFactory {
{
// Look for certain entries in the stream, to figure it
// out from
if (poifsDir.hasEntry("Workbook") ||
// some XLS files have different entry-names
poifsDir.hasEntry("WORKBOOK") || poifsDir.hasEntry("BOOK")) {
if (getPreferEventExtractor()) {
return new EventBasedExcelExtractor(poifsDir);
for (String workbookName : WORKBOOK_DIR_ENTRY_NAMES) {
if (poifsDir.hasEntry(workbookName)) {
if (getPreferEventExtractor()) {
return new EventBasedExcelExtractor(poifsDir);
}
return new ExcelExtractor(poifsDir);
}
return new ExcelExtractor(poifsDir);
}
if (poifsDir.hasEntry("WordDocument")) {

View File

@ -54,6 +54,7 @@ import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.xmlbeans.XmlException;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
/**
@ -920,4 +921,23 @@ public class TestExtractorFactory {
}
}
}
/**
* #59074 - No supported documents found in the OLE2 stream on
* a valid Excel file
*/
@Ignore
@Test
public void a() throws Exception {
POITextExtractor ext = ExtractorFactory.createExtractor(
POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
assertNotNull(ext);
String text = ext.getText();
ext.close();
System.err.println(text);
assertNotNull(text);
assertTrue(text.contains("test"));
}
}

View File

@ -36,6 +36,7 @@ public class TestBiffViewer extends BaseXLSIteratingTest {
EXCLUDED.add("43493.xls"); // HSSFWorkbook cannot open it as well
EXCLUDED.add("password.xls");
EXCLUDED.add("46904.xls");
EXCLUDED.add("59074.xls"); // Biff 5 / Excel 95
EXCLUDED.add("35897-type4.xls"); // unsupported crypto api header
EXCLUDED.add("xor-encryption-abc.xls"); // unsupported XOR-encryption
EXCLUDED.add("testEXCEL_2.xls"); // Biff 2 / Excel 2, pre-OLE2