Before opening an OOXML file, check the first few bytes, and give helpful exceptions for common mistakes
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1735069 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
700fe05e34
commit
7112835d90
@ -28,6 +28,7 @@ import java.util.zip.ZipOutputStream;
|
||||
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
|
||||
import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
|
||||
import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
|
||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
|
||||
@ -194,6 +195,7 @@ public final class ZipPackage extends Package {
|
||||
// At this point, we should have loaded the content type part
|
||||
if (this.contentTypeManager == null) {
|
||||
// Is it a different Zip-based format?
|
||||
int numEntries = 0;
|
||||
boolean hasMimetype = false;
|
||||
boolean hasSettingsXML = false;
|
||||
entries = this.zipArchive.getEntries();
|
||||
@ -205,12 +207,18 @@ public final class ZipPackage extends Package {
|
||||
if (entry.getName().equals("settings.xml")) {
|
||||
hasSettingsXML = true;
|
||||
}
|
||||
numEntries++;
|
||||
}
|
||||
if (hasMimetype && hasSettingsXML) {
|
||||
throw new ODFNotOfficeXmlFileException(
|
||||
"The supplied data appears to be in ODF (Open Document) Format. " +
|
||||
"Formats like these (eg ODS, ODP) are not supported, try Apache ODFToolkit");
|
||||
}
|
||||
if (numEntries == 0) {
|
||||
throw new NotOfficeXmlFileException(
|
||||
"No valid entries or contents found, this is not a valid OOXML " +
|
||||
"(Office Open XML) file");
|
||||
}
|
||||
|
||||
// Fallback exception
|
||||
throw new InvalidFormatException(
|
||||
|
@ -18,8 +18,10 @@
|
||||
package org.apache.poi.openxml4j.opc.internal;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.util.Enumeration;
|
||||
@ -27,12 +29,18 @@ import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipFile;
|
||||
import java.util.zip.ZipInputStream;
|
||||
|
||||
import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
|
||||
import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
|
||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
|
||||
import org.apache.poi.openxml4j.opc.ZipPackage;
|
||||
import org.apache.poi.openxml4j.util.ZipSecureFile;
|
||||
import org.apache.poi.openxml4j.util.ZipSecureFile.ThresholdInputStream;
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.poifs.storage.HeaderBlockConstants;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
|
||||
public final class ZipHelper {
|
||||
/**
|
||||
@ -144,6 +152,67 @@ public final class ZipHelper {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies that the given stream starts with a Zip structure.
|
||||
*
|
||||
* Warning - this will consume the first few bytes of the stream,
|
||||
* you should push-back or reset the stream after use!
|
||||
*/
|
||||
public static void verifyZipHeader(InputStream stream)
|
||||
throws NotOfficeXmlFileException, IOException {
|
||||
// Grab the first 8 bytes
|
||||
byte[] data = new byte[8];
|
||||
IOUtils.readFully(stream, data);
|
||||
|
||||
// OLE2?
|
||||
long signature = LittleEndian.getLong(data);
|
||||
if (signature == HeaderBlockConstants._signature) {
|
||||
throw new OLE2NotOfficeXmlFileException(
|
||||
"The supplied data appears to be in the OLE2 Format. " +
|
||||
"You are calling the part of POI that deals with OOXML "+
|
||||
"(Office Open XML) Documents. You need to call a different " +
|
||||
"part of POI to process this data (eg HSSF instead of XSSF)");
|
||||
}
|
||||
|
||||
// Raw XML?
|
||||
byte[] RAW_XML_FILE_HEADER = POIFSConstants.RAW_XML_FILE_HEADER;
|
||||
if (data[0] == RAW_XML_FILE_HEADER[0] &&
|
||||
data[1] == RAW_XML_FILE_HEADER[1] &&
|
||||
data[2] == RAW_XML_FILE_HEADER[2] &&
|
||||
data[3] == RAW_XML_FILE_HEADER[3] &&
|
||||
data[4] == RAW_XML_FILE_HEADER[4]) {
|
||||
throw new NotOfficeXmlFileException(
|
||||
"The supplied data appears to be a raw XML file. " +
|
||||
"Formats such as Office 2003 XML are not supported");
|
||||
}
|
||||
|
||||
// Don't check for a Zip header, as to maintain backwards
|
||||
// compatibility we need to let them seek over junk at the
|
||||
// start before beginning processing.
|
||||
|
||||
// Put things back
|
||||
if (stream instanceof PushbackInputStream) {
|
||||
((PushbackInputStream)stream).unread(data);
|
||||
} else if (stream.markSupported()) {
|
||||
stream.reset();
|
||||
} else if (stream instanceof FileInputStream) {
|
||||
// File open check, about to be closed, nothing to do
|
||||
} else {
|
||||
// Oh dear... I hope you know what you're doing!
|
||||
}
|
||||
}
|
||||
|
||||
private static InputStream prepareToCheckHeader(InputStream stream) {
|
||||
if (stream instanceof PushbackInputStream) {
|
||||
return stream;
|
||||
}
|
||||
if (stream.markSupported()) {
|
||||
stream.mark(8);
|
||||
return stream;
|
||||
}
|
||||
return new PushbackInputStream(stream, 8);
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens the specified stream as a secure zip
|
||||
@ -153,7 +222,12 @@ public final class ZipHelper {
|
||||
* @return The zip stream freshly open.
|
||||
*/
|
||||
public static ThresholdInputStream openZipStream(InputStream stream) throws IOException {
|
||||
InputStream zis = new ZipInputStream(stream);
|
||||
// Peek at the first few bytes to sanity check
|
||||
InputStream checkedStream = prepareToCheckHeader(stream);
|
||||
verifyZipHeader(checkedStream);
|
||||
|
||||
// Open as a proper zip stream
|
||||
InputStream zis = new ZipInputStream(checkedStream);
|
||||
ThresholdInputStream tis = ZipSecureFile.addThreshold(zis);
|
||||
return tis;
|
||||
}
|
||||
@ -170,7 +244,13 @@ public final class ZipHelper {
|
||||
if (!file.exists()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Peek at the first few bytes to sanity check
|
||||
FileInputStream input = new FileInputStream(file);
|
||||
verifyZipHeader(input);
|
||||
input.close();
|
||||
|
||||
// Open as a proper zip file
|
||||
return new ZipSecureFile(file);
|
||||
}
|
||||
|
||||
|
@ -51,6 +51,7 @@ import org.apache.poi.POIXMLException;
|
||||
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
|
||||
import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
|
||||
import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
|
||||
import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
|
||||
import org.apache.poi.openxml4j.opc.internal.ContentTypeManager;
|
||||
@ -679,14 +680,38 @@ public final class TestPackage {
|
||||
POIDataSamples files = POIDataSamples.getSpreadSheetInstance();
|
||||
|
||||
// OLE2 - Stream
|
||||
// try {
|
||||
// OPCPackage.open(files.openResourceAsStream("SampleSS.xls"));
|
||||
// fail("Shouldn't be able to open OLE2");
|
||||
// } catch (OLE2NotOfficeXmlFileException e) {
|
||||
// // TODO Check details
|
||||
// }
|
||||
|
||||
try {
|
||||
OPCPackage.open(files.openResourceAsStream("SampleSS.xls"));
|
||||
fail("Shouldn't be able to open OLE2");
|
||||
} catch (OLE2NotOfficeXmlFileException e) {
|
||||
assertTrue(e.getMessage().indexOf("The supplied data appears to be in the OLE2 Format") > -1);
|
||||
assertTrue(e.getMessage().indexOf("You are calling the part of POI that deals with OOXML") > -1);
|
||||
}
|
||||
// OLE2 - File
|
||||
try {
|
||||
OPCPackage.open(files.getFile("SampleSS.xls"));
|
||||
fail("Shouldn't be able to open OLE2");
|
||||
} catch (OLE2NotOfficeXmlFileException e) {
|
||||
assertTrue(e.getMessage().indexOf("The supplied data appears to be in the OLE2 Format") > -1);
|
||||
assertTrue(e.getMessage().indexOf("You are calling the part of POI that deals with OOXML") > -1);
|
||||
}
|
||||
|
||||
// Raw XML - Stream
|
||||
try {
|
||||
OPCPackage.open(files.openResourceAsStream("SampleSS.xml"));
|
||||
fail("Shouldn't be able to open XML");
|
||||
} catch (NotOfficeXmlFileException e) {
|
||||
assertTrue(e.getMessage().indexOf("The supplied data appears to be a raw XML file") > -1);
|
||||
assertTrue(e.getMessage().indexOf("Formats such as Office 2003 XML") > -1);
|
||||
}
|
||||
// Raw XML - File
|
||||
try {
|
||||
OPCPackage.open(files.getFile("SampleSS.xml"));
|
||||
fail("Shouldn't be able to open XML");
|
||||
} catch (NotOfficeXmlFileException e) {
|
||||
assertTrue(e.getMessage().indexOf("The supplied data appears to be a raw XML file") > -1);
|
||||
assertTrue(e.getMessage().indexOf("Formats such as Office 2003 XML") > -1);
|
||||
}
|
||||
|
||||
// ODF / ODS - Stream
|
||||
try {
|
||||
@ -707,9 +732,6 @@ public final class TestPackage {
|
||||
|
||||
// Plain Text - Stream
|
||||
// Plain Text - File
|
||||
|
||||
// Raw XML - Stream
|
||||
// Raw XML - File
|
||||
}
|
||||
|
||||
@Test(expected=IOException.class)
|
||||
|
Loading…
Reference in New Issue
Block a user