Refactor to pull out the list of Excel 97+ directory entry names to a common place, avoiding duplication. Also starts on unit testing #59074
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1732579 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
41d693fe15
commit
0b8283b37c
@ -25,6 +25,7 @@ import org.apache.poi.hssf.eventusermodel.HSSFUserException;
|
||||
import org.apache.poi.hssf.record.*;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
|
||||
|
||||
/**
|
||||
* Low level event based HSSF reader. Pass either a DocumentInputStream to
|
||||
@ -59,20 +60,20 @@ public class HSSFEventFactory {
|
||||
*/
|
||||
public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException {
|
||||
// some old documents have "WORKBOOK" or "BOOK"
|
||||
final String name;
|
||||
String name = null;
|
||||
Set<String> entryNames = dir.getEntryNames();
|
||||
if (entryNames.contains("Workbook")) {
|
||||
name = "Workbook";
|
||||
} else if (entryNames.contains("WORKBOOK")) {
|
||||
name = "WORKBOOK";
|
||||
} else if (entryNames.contains("BOOK")) {
|
||||
name = "BOOK";
|
||||
} else {
|
||||
name = "Workbook";
|
||||
for (String potentialName : WORKBOOK_DIR_ENTRY_NAMES) {
|
||||
if (entryNames.contains(potentialName)) {
|
||||
name = potentialName;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If in doubt, go for the default
|
||||
if (name == null) {
|
||||
name = WORKBOOK_DIR_ENTRY_NAMES[0];
|
||||
}
|
||||
|
||||
InputStream in = dir.createDocumentInputStream(name);
|
||||
|
||||
processEvents(req, in);
|
||||
}
|
||||
|
||||
|
@ -123,6 +123,16 @@ public final class InternalWorkbook {
|
||||
*/
|
||||
private static final int MAX_SENSITIVE_SHEET_NAME_LEN = 31;
|
||||
|
||||
/**
|
||||
* Normally, the Workbook will be in a POIFS Stream called
|
||||
* "Workbook". However, some weird XLS generators use "WORKBOOK"
|
||||
* or "BOOK".
|
||||
*/
|
||||
public static final String[] WORKBOOK_DIR_ENTRY_NAMES = {
|
||||
"Workbook", // as per BIFF8 spec
|
||||
"WORKBOOK", // Typically from third party programs
|
||||
"BOOK", // Typically odd Crystal Reports exports
|
||||
};
|
||||
|
||||
private static final POILogger log = POILogFactory.getLogger(InternalWorkbook.class);
|
||||
private static final int DEBUG = POILogger.DEBUG;
|
||||
|
@ -17,6 +17,8 @@
|
||||
|
||||
package org.apache.poi.hssf.usermodel;
|
||||
|
||||
import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
@ -95,7 +97,6 @@ import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
|
||||
|
||||
/**
|
||||
* High level representation of a workbook. This is the first object most users
|
||||
* will construct whether they are reading or writing a workbook. It is also the
|
||||
@ -243,17 +244,6 @@ public final class HSSFWorkbook extends POIDocument implements org.apache.poi.ss
|
||||
this(fs.getRoot(), fs, preserveNodes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Normally, the Workbook will be in a POIFS Stream
|
||||
* called "Workbook". However, some weird XLS generators use "WORKBOOK"
|
||||
*/
|
||||
private static final String[] WORKBOOK_DIR_ENTRY_NAMES = {
|
||||
"Workbook", // as per BIFF8 spec
|
||||
"WORKBOOK", // Typically from third party programs
|
||||
"BOOK", // Typically odd Crystal Reports exports
|
||||
};
|
||||
|
||||
|
||||
public static String getWorkbookDirEntryName(DirectoryNode directory) {
|
||||
|
||||
for (int i = 0; i < WORKBOOK_DIR_ENTRY_NAMES.length; i++) {
|
||||
|
@ -66,6 +66,8 @@ import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFRelation;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
|
||||
import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
|
||||
|
||||
/**
|
||||
* Figures out the correct POITextExtractor for your supplied
|
||||
* document, and returns it.
|
||||
@ -301,13 +303,13 @@ public class ExtractorFactory {
|
||||
{
|
||||
// Look for certain entries in the stream, to figure it
|
||||
// out from
|
||||
if (poifsDir.hasEntry("Workbook") ||
|
||||
// some XLS files have different entry-names
|
||||
poifsDir.hasEntry("WORKBOOK") || poifsDir.hasEntry("BOOK")) {
|
||||
if (getPreferEventExtractor()) {
|
||||
return new EventBasedExcelExtractor(poifsDir);
|
||||
for (String workbookName : WORKBOOK_DIR_ENTRY_NAMES) {
|
||||
if (poifsDir.hasEntry(workbookName)) {
|
||||
if (getPreferEventExtractor()) {
|
||||
return new EventBasedExcelExtractor(poifsDir);
|
||||
}
|
||||
return new ExcelExtractor(poifsDir);
|
||||
}
|
||||
return new ExcelExtractor(poifsDir);
|
||||
}
|
||||
|
||||
if (poifsDir.hasEntry("WordDocument")) {
|
||||
|
@ -54,6 +54,7 @@ import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
|
||||
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
@ -920,4 +921,23 @@ public class TestExtractorFactory {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* #59074 - No supported documents found in the OLE2 stream on
|
||||
* a valid Excel file
|
||||
*/
|
||||
@Ignore
|
||||
@Test
|
||||
public void a() throws Exception {
|
||||
POITextExtractor ext = ExtractorFactory.createExtractor(
|
||||
POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
|
||||
assertNotNull(ext);
|
||||
|
||||
String text = ext.getText();
|
||||
ext.close();
|
||||
|
||||
System.err.println(text);
|
||||
assertNotNull(text);
|
||||
assertTrue(text.contains("test"));
|
||||
}
|
||||
}
|
||||
|
@ -36,6 +36,7 @@ public class TestBiffViewer extends BaseXLSIteratingTest {
|
||||
EXCLUDED.add("43493.xls"); // HSSFWorkbook cannot open it as well
|
||||
EXCLUDED.add("password.xls");
|
||||
EXCLUDED.add("46904.xls");
|
||||
EXCLUDED.add("59074.xls"); // Biff 5 / Excel 95
|
||||
EXCLUDED.add("35897-type4.xls"); // unsupported crypto api header
|
||||
EXCLUDED.add("xor-encryption-abc.xls"); // unsupported XOR-encryption
|
||||
EXCLUDED.add("testEXCEL_2.xls"); // Biff 2 / Excel 2, pre-OLE2
|
||||
|
Loading…
Reference in New Issue
Block a user