diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java index 549d2e576..c5ac56391 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java @@ -28,6 +28,7 @@ import java.util.zip.ZipOutputStream; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.InvalidOperationException; +import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException; import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException; @@ -194,6 +195,7 @@ public final class ZipPackage extends Package { // At this point, we should have loaded the content type part if (this.contentTypeManager == null) { // Is it a different Zip-based format? + int numEntries = 0; boolean hasMimetype = false; boolean hasSettingsXML = false; entries = this.zipArchive.getEntries(); @@ -205,12 +207,18 @@ public final class ZipPackage extends Package { if (entry.getName().equals("settings.xml")) { hasSettingsXML = true; } + numEntries++; } if (hasMimetype && hasSettingsXML) { throw new ODFNotOfficeXmlFileException( "The supplied data appears to be in ODF (Open Document) Format. " + "Formats like these (eg ODS, ODP) are not supported, try Apache ODFToolkit"); } + if (numEntries == 0) { + throw new NotOfficeXmlFileException( + "No valid entries or contents found, this is not a valid OOXML " + + "(Office Open XML) file"); + } // Fallback exception throw new InvalidFormatException( diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java index 2a536dc7b..786a69f0c 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java @@ -18,8 +18,10 @@ package org.apache.poi.openxml4j.opc.internal; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.PushbackInputStream; import java.net.URI; import java.net.URISyntaxException; import java.util.Enumeration; @@ -27,12 +29,18 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import java.util.zip.ZipInputStream; +import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException; +import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.PackageRelationship; import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; import org.apache.poi.openxml4j.opc.ZipPackage; import org.apache.poi.openxml4j.util.ZipSecureFile; import org.apache.poi.openxml4j.util.ZipSecureFile.ThresholdInputStream; +import org.apache.poi.poifs.common.POIFSConstants; +import org.apache.poi.poifs.storage.HeaderBlockConstants; +import org.apache.poi.util.IOUtils; +import org.apache.poi.util.LittleEndian; public final class ZipHelper { /** @@ -144,6 +152,67 @@ public final class ZipHelper { return null; } } + + /** + * Verifies that the given stream starts with a Zip structure. + * + * Warning - this will consume the first few bytes of the stream, + * you should push-back or reset the stream after use! + */ + public static void verifyZipHeader(InputStream stream) + throws NotOfficeXmlFileException, IOException { + // Grab the first 8 bytes + byte[] data = new byte[8]; + IOUtils.readFully(stream, data); + + // OLE2? + long signature = LittleEndian.getLong(data); + if (signature == HeaderBlockConstants._signature) { + throw new OLE2NotOfficeXmlFileException( + "The supplied data appears to be in the OLE2 Format. " + + "You are calling the part of POI that deals with OOXML "+ + "(Office Open XML) Documents. You need to call a different " + + "part of POI to process this data (eg HSSF instead of XSSF)"); + } + + // Raw XML? + byte[] RAW_XML_FILE_HEADER = POIFSConstants.RAW_XML_FILE_HEADER; + if (data[0] == RAW_XML_FILE_HEADER[0] && + data[1] == RAW_XML_FILE_HEADER[1] && + data[2] == RAW_XML_FILE_HEADER[2] && + data[3] == RAW_XML_FILE_HEADER[3] && + data[4] == RAW_XML_FILE_HEADER[4]) { + throw new NotOfficeXmlFileException( + "The supplied data appears to be a raw XML file. " + + "Formats such as Office 2003 XML are not supported"); + } + + // Don't check for a Zip header, as to maintain backwards + // compatibility we need to let them seek over junk at the + // start before beginning processing. + + // Put things back + if (stream instanceof PushbackInputStream) { + ((PushbackInputStream)stream).unread(data); + } else if (stream.markSupported()) { + stream.reset(); + } else if (stream instanceof FileInputStream) { + // File open check, about to be closed, nothing to do + } else { + // Oh dear... I hope you know what you're doing! + } + } + + private static InputStream prepareToCheckHeader(InputStream stream) { + if (stream instanceof PushbackInputStream) { + return stream; + } + if (stream.markSupported()) { + stream.mark(8); + return stream; + } + return new PushbackInputStream(stream, 8); + } /** * Opens the specified stream as a secure zip @@ -153,7 +222,12 @@ public final class ZipHelper { * @return The zip stream freshly open. */ public static ThresholdInputStream openZipStream(InputStream stream) throws IOException { - InputStream zis = new ZipInputStream(stream); + // Peek at the first few bytes to sanity check + InputStream checkedStream = prepareToCheckHeader(stream); + verifyZipHeader(checkedStream); + + // Open as a proper zip stream + InputStream zis = new ZipInputStream(checkedStream); ThresholdInputStream tis = ZipSecureFile.addThreshold(zis); return tis; } @@ -170,7 +244,13 @@ public final class ZipHelper { if (!file.exists()) { return null; } + + // Peek at the first few bytes to sanity check + FileInputStream input = new FileInputStream(file); + verifyZipHeader(input); + input.close(); + // Open as a proper zip file return new ZipSecureFile(file); } diff --git a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java index 7a113d728..9cde41486 100644 --- a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java +++ b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java @@ -51,6 +51,7 @@ import org.apache.poi.POIXMLException; import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.InvalidOperationException; +import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException; import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException; import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException; import org.apache.poi.openxml4j.opc.internal.ContentTypeManager; @@ -679,14 +680,38 @@ public final class TestPackage { POIDataSamples files = POIDataSamples.getSpreadSheetInstance(); // OLE2 - Stream -// try { -// OPCPackage.open(files.openResourceAsStream("SampleSS.xls")); -// fail("Shouldn't be able to open OLE2"); -// } catch (OLE2NotOfficeXmlFileException e) { -// // TODO Check details -// } - + try { + OPCPackage.open(files.openResourceAsStream("SampleSS.xls")); + fail("Shouldn't be able to open OLE2"); + } catch (OLE2NotOfficeXmlFileException e) { + assertTrue(e.getMessage().indexOf("The supplied data appears to be in the OLE2 Format") > -1); + assertTrue(e.getMessage().indexOf("You are calling the part of POI that deals with OOXML") > -1); + } // OLE2 - File + try { + OPCPackage.open(files.getFile("SampleSS.xls")); + fail("Shouldn't be able to open OLE2"); + } catch (OLE2NotOfficeXmlFileException e) { + assertTrue(e.getMessage().indexOf("The supplied data appears to be in the OLE2 Format") > -1); + assertTrue(e.getMessage().indexOf("You are calling the part of POI that deals with OOXML") > -1); + } + + // Raw XML - Stream + try { + OPCPackage.open(files.openResourceAsStream("SampleSS.xml")); + fail("Shouldn't be able to open XML"); + } catch (NotOfficeXmlFileException e) { + assertTrue(e.getMessage().indexOf("The supplied data appears to be a raw XML file") > -1); + assertTrue(e.getMessage().indexOf("Formats such as Office 2003 XML") > -1); + } + // Raw XML - File + try { + OPCPackage.open(files.getFile("SampleSS.xml")); + fail("Shouldn't be able to open XML"); + } catch (NotOfficeXmlFileException e) { + assertTrue(e.getMessage().indexOf("The supplied data appears to be a raw XML file") > -1); + assertTrue(e.getMessage().indexOf("Formats such as Office 2003 XML") > -1); + } // ODF / ODS - Stream try { @@ -707,9 +732,6 @@ public final class TestPackage { // Plain Text - Stream // Plain Text - File - - // Raw XML - Stream - // Raw XML - File } @Test(expected=IOException.class)