Refactor to pull out the list of Excel 97+ directory entry names to a common place, avoiding duplication. Also starts on unit testing #59074
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1732579 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
41d693fe15
commit
0b8283b37c
@ -25,6 +25,7 @@ import org.apache.poi.hssf.eventusermodel.HSSFUserException;
|
|||||||
import org.apache.poi.hssf.record.*;
|
import org.apache.poi.hssf.record.*;
|
||||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Low level event based HSSF reader. Pass either a DocumentInputStream to
|
* Low level event based HSSF reader. Pass either a DocumentInputStream to
|
||||||
@ -59,20 +60,20 @@ public class HSSFEventFactory {
|
|||||||
*/
|
*/
|
||||||
public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException {
|
public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException {
|
||||||
// some old documents have "WORKBOOK" or "BOOK"
|
// some old documents have "WORKBOOK" or "BOOK"
|
||||||
final String name;
|
String name = null;
|
||||||
Set<String> entryNames = dir.getEntryNames();
|
Set<String> entryNames = dir.getEntryNames();
|
||||||
if (entryNames.contains("Workbook")) {
|
for (String potentialName : WORKBOOK_DIR_ENTRY_NAMES) {
|
||||||
name = "Workbook";
|
if (entryNames.contains(potentialName)) {
|
||||||
} else if (entryNames.contains("WORKBOOK")) {
|
name = potentialName;
|
||||||
name = "WORKBOOK";
|
break;
|
||||||
} else if (entryNames.contains("BOOK")) {
|
}
|
||||||
name = "BOOK";
|
}
|
||||||
} else {
|
// If in doubt, go for the default
|
||||||
name = "Workbook";
|
if (name == null) {
|
||||||
|
name = WORKBOOK_DIR_ENTRY_NAMES[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
InputStream in = dir.createDocumentInputStream(name);
|
InputStream in = dir.createDocumentInputStream(name);
|
||||||
|
|
||||||
processEvents(req, in);
|
processEvents(req, in);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,6 +123,16 @@ public final class InternalWorkbook {
|
|||||||
*/
|
*/
|
||||||
private static final int MAX_SENSITIVE_SHEET_NAME_LEN = 31;
|
private static final int MAX_SENSITIVE_SHEET_NAME_LEN = 31;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normally, the Workbook will be in a POIFS Stream called
|
||||||
|
* "Workbook". However, some weird XLS generators use "WORKBOOK"
|
||||||
|
* or "BOOK".
|
||||||
|
*/
|
||||||
|
public static final String[] WORKBOOK_DIR_ENTRY_NAMES = {
|
||||||
|
"Workbook", // as per BIFF8 spec
|
||||||
|
"WORKBOOK", // Typically from third party programs
|
||||||
|
"BOOK", // Typically odd Crystal Reports exports
|
||||||
|
};
|
||||||
|
|
||||||
private static final POILogger log = POILogFactory.getLogger(InternalWorkbook.class);
|
private static final POILogger log = POILogFactory.getLogger(InternalWorkbook.class);
|
||||||
private static final int DEBUG = POILogger.DEBUG;
|
private static final int DEBUG = POILogger.DEBUG;
|
||||||
|
@ -17,6 +17,8 @@
|
|||||||
|
|
||||||
package org.apache.poi.hssf.usermodel;
|
package org.apache.poi.hssf.usermodel;
|
||||||
|
|
||||||
|
import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
@ -95,7 +97,6 @@ import org.apache.poi.util.LittleEndian;
|
|||||||
import org.apache.poi.util.POILogFactory;
|
import org.apache.poi.util.POILogFactory;
|
||||||
import org.apache.poi.util.POILogger;
|
import org.apache.poi.util.POILogger;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* High level representation of a workbook. This is the first object most users
|
* High level representation of a workbook. This is the first object most users
|
||||||
* will construct whether they are reading or writing a workbook. It is also the
|
* will construct whether they are reading or writing a workbook. It is also the
|
||||||
@ -243,17 +244,6 @@ public final class HSSFWorkbook extends POIDocument implements org.apache.poi.ss
|
|||||||
this(fs.getRoot(), fs, preserveNodes);
|
this(fs.getRoot(), fs, preserveNodes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Normally, the Workbook will be in a POIFS Stream
|
|
||||||
* called "Workbook". However, some weird XLS generators use "WORKBOOK"
|
|
||||||
*/
|
|
||||||
private static final String[] WORKBOOK_DIR_ENTRY_NAMES = {
|
|
||||||
"Workbook", // as per BIFF8 spec
|
|
||||||
"WORKBOOK", // Typically from third party programs
|
|
||||||
"BOOK", // Typically odd Crystal Reports exports
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
public static String getWorkbookDirEntryName(DirectoryNode directory) {
|
public static String getWorkbookDirEntryName(DirectoryNode directory) {
|
||||||
|
|
||||||
for (int i = 0; i < WORKBOOK_DIR_ENTRY_NAMES.length; i++) {
|
for (int i = 0; i < WORKBOOK_DIR_ENTRY_NAMES.length; i++) {
|
||||||
|
@ -66,6 +66,8 @@ import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
|
|||||||
import org.apache.poi.xwpf.usermodel.XWPFRelation;
|
import org.apache.poi.xwpf.usermodel.XWPFRelation;
|
||||||
import org.apache.xmlbeans.XmlException;
|
import org.apache.xmlbeans.XmlException;
|
||||||
|
|
||||||
|
import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Figures out the correct POITextExtractor for your supplied
|
* Figures out the correct POITextExtractor for your supplied
|
||||||
* document, and returns it.
|
* document, and returns it.
|
||||||
@ -301,14 +303,14 @@ public class ExtractorFactory {
|
|||||||
{
|
{
|
||||||
// Look for certain entries in the stream, to figure it
|
// Look for certain entries in the stream, to figure it
|
||||||
// out from
|
// out from
|
||||||
if (poifsDir.hasEntry("Workbook") ||
|
for (String workbookName : WORKBOOK_DIR_ENTRY_NAMES) {
|
||||||
// some XLS files have different entry-names
|
if (poifsDir.hasEntry(workbookName)) {
|
||||||
poifsDir.hasEntry("WORKBOOK") || poifsDir.hasEntry("BOOK")) {
|
|
||||||
if (getPreferEventExtractor()) {
|
if (getPreferEventExtractor()) {
|
||||||
return new EventBasedExcelExtractor(poifsDir);
|
return new EventBasedExcelExtractor(poifsDir);
|
||||||
}
|
}
|
||||||
return new ExcelExtractor(poifsDir);
|
return new ExcelExtractor(poifsDir);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (poifsDir.hasEntry("WordDocument")) {
|
if (poifsDir.hasEntry("WordDocument")) {
|
||||||
// Old or new style word document?
|
// Old or new style word document?
|
||||||
|
@ -54,6 +54,7 @@ import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
|
|||||||
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
|
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
|
||||||
import org.apache.xmlbeans.XmlException;
|
import org.apache.xmlbeans.XmlException;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Ignore;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -920,4 +921,23 @@ public class TestExtractorFactory {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* #59074 - No supported documents found in the OLE2 stream on
|
||||||
|
* a valid Excel file
|
||||||
|
*/
|
||||||
|
@Ignore
|
||||||
|
@Test
|
||||||
|
public void a() throws Exception {
|
||||||
|
POITextExtractor ext = ExtractorFactory.createExtractor(
|
||||||
|
POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
|
||||||
|
assertNotNull(ext);
|
||||||
|
|
||||||
|
String text = ext.getText();
|
||||||
|
ext.close();
|
||||||
|
|
||||||
|
System.err.println(text);
|
||||||
|
assertNotNull(text);
|
||||||
|
assertTrue(text.contains("test"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -36,6 +36,7 @@ public class TestBiffViewer extends BaseXLSIteratingTest {
|
|||||||
EXCLUDED.add("43493.xls"); // HSSFWorkbook cannot open it as well
|
EXCLUDED.add("43493.xls"); // HSSFWorkbook cannot open it as well
|
||||||
EXCLUDED.add("password.xls");
|
EXCLUDED.add("password.xls");
|
||||||
EXCLUDED.add("46904.xls");
|
EXCLUDED.add("46904.xls");
|
||||||
|
EXCLUDED.add("59074.xls"); // Biff 5 / Excel 95
|
||||||
EXCLUDED.add("35897-type4.xls"); // unsupported crypto api header
|
EXCLUDED.add("35897-type4.xls"); // unsupported crypto api header
|
||||||
EXCLUDED.add("xor-encryption-abc.xls"); // unsupported XOR-encryption
|
EXCLUDED.add("xor-encryption-abc.xls"); // unsupported XOR-encryption
|
||||||
EXCLUDED.add("testEXCEL_2.xls"); // Biff 2 / Excel 2, pre-OLE2
|
EXCLUDED.add("testEXCEL_2.xls"); // Biff 2 / Excel 2, pre-OLE2
|
||||||
|
Loading…
Reference in New Issue
Block a user