Before opening an OOXML file, check the first few bytes, and give helpful exceptions for common mistakes

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1735069 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2016-03-15 12:30:45 +00:00
parent 700fe05e34
commit 7112835d90
3 changed files with 121 additions and 11 deletions

View File

@ -28,6 +28,7 @@ import java.util.zip.ZipOutputStream;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
@ -194,6 +195,7 @@ public final class ZipPackage extends Package {
// At this point, we should have loaded the content type part
if (this.contentTypeManager == null) {
// Is it a different Zip-based format?
int numEntries = 0;
boolean hasMimetype = false;
boolean hasSettingsXML = false;
entries = this.zipArchive.getEntries();
@ -205,12 +207,18 @@ public final class ZipPackage extends Package {
if (entry.getName().equals("settings.xml")) {
hasSettingsXML = true;
}
numEntries++;
}
if (hasMimetype && hasSettingsXML) {
throw new ODFNotOfficeXmlFileException(
"The supplied data appears to be in ODF (Open Document) Format. " +
"Formats like these (eg ODS, ODP) are not supported, try Apache ODFToolkit");
}
if (numEntries == 0) {
throw new NotOfficeXmlFileException(
"No valid entries or contents found, this is not a valid OOXML " +
"(Office Open XML) file");
}
// Fallback exception
throw new InvalidFormatException(

View File

@ -18,8 +18,10 @@
package org.apache.poi.openxml4j.opc.internal;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Enumeration;
@ -27,12 +29,18 @@ import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.poi.openxml4j.opc.ZipPackage;
import org.apache.poi.openxml4j.util.ZipSecureFile;
import org.apache.poi.openxml4j.util.ZipSecureFile.ThresholdInputStream;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.poifs.storage.HeaderBlockConstants;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
public final class ZipHelper {
/**
@ -144,6 +152,67 @@ public final class ZipHelper {
return null;
}
}
/**
* Verifies that the given stream starts with a Zip structure.
*
* Warning - this will consume the first few bytes of the stream,
* you should push-back or reset the stream after use!
*/
public static void verifyZipHeader(InputStream stream)
throws NotOfficeXmlFileException, IOException {
// Grab the first 8 bytes
byte[] data = new byte[8];
IOUtils.readFully(stream, data);
// OLE2?
long signature = LittleEndian.getLong(data);
if (signature == HeaderBlockConstants._signature) {
throw new OLE2NotOfficeXmlFileException(
"The supplied data appears to be in the OLE2 Format. " +
"You are calling the part of POI that deals with OOXML "+
"(Office Open XML) Documents. You need to call a different " +
"part of POI to process this data (eg HSSF instead of XSSF)");
}
// Raw XML?
byte[] RAW_XML_FILE_HEADER = POIFSConstants.RAW_XML_FILE_HEADER;
if (data[0] == RAW_XML_FILE_HEADER[0] &&
data[1] == RAW_XML_FILE_HEADER[1] &&
data[2] == RAW_XML_FILE_HEADER[2] &&
data[3] == RAW_XML_FILE_HEADER[3] &&
data[4] == RAW_XML_FILE_HEADER[4]) {
throw new NotOfficeXmlFileException(
"The supplied data appears to be a raw XML file. " +
"Formats such as Office 2003 XML are not supported");
}
// Don't check for a Zip header, as to maintain backwards
// compatibility we need to let them seek over junk at the
// start before beginning processing.
// Put things back
if (stream instanceof PushbackInputStream) {
((PushbackInputStream)stream).unread(data);
} else if (stream.markSupported()) {
stream.reset();
} else if (stream instanceof FileInputStream) {
// File open check, about to be closed, nothing to do
} else {
// Oh dear... I hope you know what you're doing!
}
}
private static InputStream prepareToCheckHeader(InputStream stream) {
if (stream instanceof PushbackInputStream) {
return stream;
}
if (stream.markSupported()) {
stream.mark(8);
return stream;
}
return new PushbackInputStream(stream, 8);
}
/**
* Opens the specified stream as a secure zip
@ -153,7 +222,12 @@ public final class ZipHelper {
* @return The zip stream freshly open.
*/
public static ThresholdInputStream openZipStream(InputStream stream) throws IOException {
InputStream zis = new ZipInputStream(stream);
// Peek at the first few bytes to sanity check
InputStream checkedStream = prepareToCheckHeader(stream);
verifyZipHeader(checkedStream);
// Open as a proper zip stream
InputStream zis = new ZipInputStream(checkedStream);
ThresholdInputStream tis = ZipSecureFile.addThreshold(zis);
return tis;
}
@ -170,7 +244,13 @@ public final class ZipHelper {
if (!file.exists()) {
return null;
}
// Peek at the first few bytes to sanity check
FileInputStream input = new FileInputStream(file);
verifyZipHeader(input);
input.close();
// Open as a proper zip file
return new ZipSecureFile(file);
}

View File

@ -51,6 +51,7 @@ import org.apache.poi.POIXMLException;
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
import org.apache.poi.openxml4j.opc.internal.ContentTypeManager;
@ -679,14 +680,38 @@ public final class TestPackage {
POIDataSamples files = POIDataSamples.getSpreadSheetInstance();
// OLE2 - Stream
// try {
// OPCPackage.open(files.openResourceAsStream("SampleSS.xls"));
// fail("Shouldn't be able to open OLE2");
// } catch (OLE2NotOfficeXmlFileException e) {
// // TODO Check details
// }
try {
OPCPackage.open(files.openResourceAsStream("SampleSS.xls"));
fail("Shouldn't be able to open OLE2");
} catch (OLE2NotOfficeXmlFileException e) {
assertTrue(e.getMessage().indexOf("The supplied data appears to be in the OLE2 Format") > -1);
assertTrue(e.getMessage().indexOf("You are calling the part of POI that deals with OOXML") > -1);
}
// OLE2 - File
try {
OPCPackage.open(files.getFile("SampleSS.xls"));
fail("Shouldn't be able to open OLE2");
} catch (OLE2NotOfficeXmlFileException e) {
assertTrue(e.getMessage().indexOf("The supplied data appears to be in the OLE2 Format") > -1);
assertTrue(e.getMessage().indexOf("You are calling the part of POI that deals with OOXML") > -1);
}
// Raw XML - Stream
try {
OPCPackage.open(files.openResourceAsStream("SampleSS.xml"));
fail("Shouldn't be able to open XML");
} catch (NotOfficeXmlFileException e) {
assertTrue(e.getMessage().indexOf("The supplied data appears to be a raw XML file") > -1);
assertTrue(e.getMessage().indexOf("Formats such as Office 2003 XML") > -1);
}
// Raw XML - File
try {
OPCPackage.open(files.getFile("SampleSS.xml"));
fail("Shouldn't be able to open XML");
} catch (NotOfficeXmlFileException e) {
assertTrue(e.getMessage().indexOf("The supplied data appears to be a raw XML file") > -1);
assertTrue(e.getMessage().indexOf("Formats such as Office 2003 XML") > -1);
}
// ODF / ODS - Stream
try {
@ -707,9 +732,6 @@ public final class TestPackage {
// Plain Text - Stream
// Plain Text - File
// Raw XML - Stream
// Raw XML - File
}
@Test(expected=IOException.class)