Before opening an OOXML file, check the first few bytes, and give helpful exceptions for common mistakes
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1735069 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
700fe05e34
commit
7112835d90
@ -28,6 +28,7 @@ import java.util.zip.ZipOutputStream;
|
|||||||
|
|
||||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||||
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
|
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
|
||||||
|
import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
|
||||||
import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
|
import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
|
||||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
|
import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
|
||||||
@ -194,6 +195,7 @@ public final class ZipPackage extends Package {
|
|||||||
// At this point, we should have loaded the content type part
|
// At this point, we should have loaded the content type part
|
||||||
if (this.contentTypeManager == null) {
|
if (this.contentTypeManager == null) {
|
||||||
// Is it a different Zip-based format?
|
// Is it a different Zip-based format?
|
||||||
|
int numEntries = 0;
|
||||||
boolean hasMimetype = false;
|
boolean hasMimetype = false;
|
||||||
boolean hasSettingsXML = false;
|
boolean hasSettingsXML = false;
|
||||||
entries = this.zipArchive.getEntries();
|
entries = this.zipArchive.getEntries();
|
||||||
@ -205,12 +207,18 @@ public final class ZipPackage extends Package {
|
|||||||
if (entry.getName().equals("settings.xml")) {
|
if (entry.getName().equals("settings.xml")) {
|
||||||
hasSettingsXML = true;
|
hasSettingsXML = true;
|
||||||
}
|
}
|
||||||
|
numEntries++;
|
||||||
}
|
}
|
||||||
if (hasMimetype && hasSettingsXML) {
|
if (hasMimetype && hasSettingsXML) {
|
||||||
throw new ODFNotOfficeXmlFileException(
|
throw new ODFNotOfficeXmlFileException(
|
||||||
"The supplied data appears to be in ODF (Open Document) Format. " +
|
"The supplied data appears to be in ODF (Open Document) Format. " +
|
||||||
"Formats like these (eg ODS, ODP) are not supported, try Apache ODFToolkit");
|
"Formats like these (eg ODS, ODP) are not supported, try Apache ODFToolkit");
|
||||||
}
|
}
|
||||||
|
if (numEntries == 0) {
|
||||||
|
throw new NotOfficeXmlFileException(
|
||||||
|
"No valid entries or contents found, this is not a valid OOXML " +
|
||||||
|
"(Office Open XML) file");
|
||||||
|
}
|
||||||
|
|
||||||
// Fallback exception
|
// Fallback exception
|
||||||
throw new InvalidFormatException(
|
throw new InvalidFormatException(
|
||||||
|
@ -18,8 +18,10 @@
|
|||||||
package org.apache.poi.openxml4j.opc.internal;
|
package org.apache.poi.openxml4j.opc.internal;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.io.PushbackInputStream;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
import java.util.Enumeration;
|
import java.util.Enumeration;
|
||||||
@ -27,12 +29,18 @@ import java.util.zip.ZipEntry;
|
|||||||
import java.util.zip.ZipFile;
|
import java.util.zip.ZipFile;
|
||||||
import java.util.zip.ZipInputStream;
|
import java.util.zip.ZipInputStream;
|
||||||
|
|
||||||
|
import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
|
||||||
|
import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
|
||||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||||
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
||||||
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
|
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
|
||||||
import org.apache.poi.openxml4j.opc.ZipPackage;
|
import org.apache.poi.openxml4j.opc.ZipPackage;
|
||||||
import org.apache.poi.openxml4j.util.ZipSecureFile;
|
import org.apache.poi.openxml4j.util.ZipSecureFile;
|
||||||
import org.apache.poi.openxml4j.util.ZipSecureFile.ThresholdInputStream;
|
import org.apache.poi.openxml4j.util.ZipSecureFile.ThresholdInputStream;
|
||||||
|
import org.apache.poi.poifs.common.POIFSConstants;
|
||||||
|
import org.apache.poi.poifs.storage.HeaderBlockConstants;
|
||||||
|
import org.apache.poi.util.IOUtils;
|
||||||
|
import org.apache.poi.util.LittleEndian;
|
||||||
|
|
||||||
public final class ZipHelper {
|
public final class ZipHelper {
|
||||||
/**
|
/**
|
||||||
@ -144,6 +152,67 @@ public final class ZipHelper {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verifies that the given stream starts with a Zip structure.
|
||||||
|
*
|
||||||
|
* Warning - this will consume the first few bytes of the stream,
|
||||||
|
* you should push-back or reset the stream after use!
|
||||||
|
*/
|
||||||
|
public static void verifyZipHeader(InputStream stream)
|
||||||
|
throws NotOfficeXmlFileException, IOException {
|
||||||
|
// Grab the first 8 bytes
|
||||||
|
byte[] data = new byte[8];
|
||||||
|
IOUtils.readFully(stream, data);
|
||||||
|
|
||||||
|
// OLE2?
|
||||||
|
long signature = LittleEndian.getLong(data);
|
||||||
|
if (signature == HeaderBlockConstants._signature) {
|
||||||
|
throw new OLE2NotOfficeXmlFileException(
|
||||||
|
"The supplied data appears to be in the OLE2 Format. " +
|
||||||
|
"You are calling the part of POI that deals with OOXML "+
|
||||||
|
"(Office Open XML) Documents. You need to call a different " +
|
||||||
|
"part of POI to process this data (eg HSSF instead of XSSF)");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Raw XML?
|
||||||
|
byte[] RAW_XML_FILE_HEADER = POIFSConstants.RAW_XML_FILE_HEADER;
|
||||||
|
if (data[0] == RAW_XML_FILE_HEADER[0] &&
|
||||||
|
data[1] == RAW_XML_FILE_HEADER[1] &&
|
||||||
|
data[2] == RAW_XML_FILE_HEADER[2] &&
|
||||||
|
data[3] == RAW_XML_FILE_HEADER[3] &&
|
||||||
|
data[4] == RAW_XML_FILE_HEADER[4]) {
|
||||||
|
throw new NotOfficeXmlFileException(
|
||||||
|
"The supplied data appears to be a raw XML file. " +
|
||||||
|
"Formats such as Office 2003 XML are not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't check for a Zip header, as to maintain backwards
|
||||||
|
// compatibility we need to let them seek over junk at the
|
||||||
|
// start before beginning processing.
|
||||||
|
|
||||||
|
// Put things back
|
||||||
|
if (stream instanceof PushbackInputStream) {
|
||||||
|
((PushbackInputStream)stream).unread(data);
|
||||||
|
} else if (stream.markSupported()) {
|
||||||
|
stream.reset();
|
||||||
|
} else if (stream instanceof FileInputStream) {
|
||||||
|
// File open check, about to be closed, nothing to do
|
||||||
|
} else {
|
||||||
|
// Oh dear... I hope you know what you're doing!
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static InputStream prepareToCheckHeader(InputStream stream) {
|
||||||
|
if (stream instanceof PushbackInputStream) {
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
|
if (stream.markSupported()) {
|
||||||
|
stream.mark(8);
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
|
return new PushbackInputStream(stream, 8);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Opens the specified stream as a secure zip
|
* Opens the specified stream as a secure zip
|
||||||
@ -153,7 +222,12 @@ public final class ZipHelper {
|
|||||||
* @return The zip stream freshly open.
|
* @return The zip stream freshly open.
|
||||||
*/
|
*/
|
||||||
public static ThresholdInputStream openZipStream(InputStream stream) throws IOException {
|
public static ThresholdInputStream openZipStream(InputStream stream) throws IOException {
|
||||||
InputStream zis = new ZipInputStream(stream);
|
// Peek at the first few bytes to sanity check
|
||||||
|
InputStream checkedStream = prepareToCheckHeader(stream);
|
||||||
|
verifyZipHeader(checkedStream);
|
||||||
|
|
||||||
|
// Open as a proper zip stream
|
||||||
|
InputStream zis = new ZipInputStream(checkedStream);
|
||||||
ThresholdInputStream tis = ZipSecureFile.addThreshold(zis);
|
ThresholdInputStream tis = ZipSecureFile.addThreshold(zis);
|
||||||
return tis;
|
return tis;
|
||||||
}
|
}
|
||||||
@ -170,7 +244,13 @@ public final class ZipHelper {
|
|||||||
if (!file.exists()) {
|
if (!file.exists()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Peek at the first few bytes to sanity check
|
||||||
|
FileInputStream input = new FileInputStream(file);
|
||||||
|
verifyZipHeader(input);
|
||||||
|
input.close();
|
||||||
|
|
||||||
|
// Open as a proper zip file
|
||||||
return new ZipSecureFile(file);
|
return new ZipSecureFile(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -51,6 +51,7 @@ import org.apache.poi.POIXMLException;
|
|||||||
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
|
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
|
||||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||||
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
|
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
|
||||||
|
import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
|
||||||
import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
|
import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
|
||||||
import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
|
import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
|
||||||
import org.apache.poi.openxml4j.opc.internal.ContentTypeManager;
|
import org.apache.poi.openxml4j.opc.internal.ContentTypeManager;
|
||||||
@ -679,14 +680,38 @@ public final class TestPackage {
|
|||||||
POIDataSamples files = POIDataSamples.getSpreadSheetInstance();
|
POIDataSamples files = POIDataSamples.getSpreadSheetInstance();
|
||||||
|
|
||||||
// OLE2 - Stream
|
// OLE2 - Stream
|
||||||
// try {
|
try {
|
||||||
// OPCPackage.open(files.openResourceAsStream("SampleSS.xls"));
|
OPCPackage.open(files.openResourceAsStream("SampleSS.xls"));
|
||||||
// fail("Shouldn't be able to open OLE2");
|
fail("Shouldn't be able to open OLE2");
|
||||||
// } catch (OLE2NotOfficeXmlFileException e) {
|
} catch (OLE2NotOfficeXmlFileException e) {
|
||||||
// // TODO Check details
|
assertTrue(e.getMessage().indexOf("The supplied data appears to be in the OLE2 Format") > -1);
|
||||||
// }
|
assertTrue(e.getMessage().indexOf("You are calling the part of POI that deals with OOXML") > -1);
|
||||||
|
}
|
||||||
// OLE2 - File
|
// OLE2 - File
|
||||||
|
try {
|
||||||
|
OPCPackage.open(files.getFile("SampleSS.xls"));
|
||||||
|
fail("Shouldn't be able to open OLE2");
|
||||||
|
} catch (OLE2NotOfficeXmlFileException e) {
|
||||||
|
assertTrue(e.getMessage().indexOf("The supplied data appears to be in the OLE2 Format") > -1);
|
||||||
|
assertTrue(e.getMessage().indexOf("You are calling the part of POI that deals with OOXML") > -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Raw XML - Stream
|
||||||
|
try {
|
||||||
|
OPCPackage.open(files.openResourceAsStream("SampleSS.xml"));
|
||||||
|
fail("Shouldn't be able to open XML");
|
||||||
|
} catch (NotOfficeXmlFileException e) {
|
||||||
|
assertTrue(e.getMessage().indexOf("The supplied data appears to be a raw XML file") > -1);
|
||||||
|
assertTrue(e.getMessage().indexOf("Formats such as Office 2003 XML") > -1);
|
||||||
|
}
|
||||||
|
// Raw XML - File
|
||||||
|
try {
|
||||||
|
OPCPackage.open(files.getFile("SampleSS.xml"));
|
||||||
|
fail("Shouldn't be able to open XML");
|
||||||
|
} catch (NotOfficeXmlFileException e) {
|
||||||
|
assertTrue(e.getMessage().indexOf("The supplied data appears to be a raw XML file") > -1);
|
||||||
|
assertTrue(e.getMessage().indexOf("Formats such as Office 2003 XML") > -1);
|
||||||
|
}
|
||||||
|
|
||||||
// ODF / ODS - Stream
|
// ODF / ODS - Stream
|
||||||
try {
|
try {
|
||||||
@ -707,9 +732,6 @@ public final class TestPackage {
|
|||||||
|
|
||||||
// Plain Text - Stream
|
// Plain Text - Stream
|
||||||
// Plain Text - File
|
// Plain Text - File
|
||||||
|
|
||||||
// Raw XML - Stream
|
|
||||||
// Raw XML - File
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(expected=IOException.class)
|
@Test(expected=IOException.class)
|
||||||
|
Loading…
Reference in New Issue
Block a user