From 18a4647b24585dbe0c1d21ab342a0fa47ee5f472 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Wed, 16 Jan 2008 12:46:43 +0000 Subject: [PATCH] Add methods to check to see if a given InputStream has a OOXML file header, or a OLE2 file header, so that a future factory method could figure out which class to instantiate for a given InputStraeam git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@612438 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/poifs/common/POIFSConstants.java | 3 + .../poi/poifs/filesystem/POIFSFileSystem.java | 33 ++++++++++ .../poi/poifs/storage/HeaderBlockReader.java | 7 +- .../org/apache/poi/hxf/HXFDocument.java | 39 +++++++++++ .../org/apache/poi/hxf/TestDetectAsOOXML.java | 65 +++++++++++++++++++ .../TestOffice2007XMLException.java | 22 +++++++ 6 files changed, 167 insertions(+), 2 deletions(-) create mode 100644 src/scratchpad/ooxml-testcases/org/apache/poi/hxf/TestDetectAsOOXML.java diff --git a/src/java/org/apache/poi/poifs/common/POIFSConstants.java b/src/java/org/apache/poi/poifs/common/POIFSConstants.java index bc1bf6dad..399f52be4 100644 --- a/src/java/org/apache/poi/poifs/common/POIFSConstants.java +++ b/src/java/org/apache/poi/poifs/common/POIFSConstants.java @@ -31,4 +31,7 @@ public interface POIFSConstants public static final int END_OF_CHAIN = -2; public static final int PROPERTY_SIZE = 0x0080; public static final int UNUSED_BLOCK = -1; + + public static final byte[] OOXML_FILE_HEADER = + new byte[] { 0x50, 0x4b, 0x03, 0x04 }; } // end public interface POIFSConstants; diff --git a/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java b/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java index 981c51d39..3d4f1aac6 100644 --- a/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java +++ b/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java @@ -34,6 +34,7 @@ import org.apache.poi.poifs.storage.BlockAllocationTableReader; import org.apache.poi.poifs.storage.BlockAllocationTableWriter; import org.apache.poi.poifs.storage.BlockList; import org.apache.poi.poifs.storage.BlockWritable; +import org.apache.poi.poifs.storage.HeaderBlockConstants; import org.apache.poi.poifs.storage.HeaderBlockReader; import org.apache.poi.poifs.storage.HeaderBlockWriter; import org.apache.poi.poifs.storage.RawDataBlock; @@ -41,6 +42,9 @@ import org.apache.poi.poifs.storage.RawDataBlockList; import org.apache.poi.poifs.storage.SmallBlockTableReader; import org.apache.poi.poifs.storage.SmallBlockTableWriter; import org.apache.poi.poifs.storage.SmallDocumentBlock; +import org.apache.poi.util.IOUtils; +import org.apache.poi.util.LongField; +import org.apache.xmlbeans.impl.common.IOUtil; /** * This is the main class of the POIFS system; it manages the entire @@ -106,6 +110,35 @@ public class POIFSFileSystem .getSBATStart()), data_blocks, properties.getRoot() .getChildren(), null); } + + /** + * Checks that the supplied InputStream (which MUST + * support mark and reset, or be a PushbackInputStream) + * has a POIFS (OLE2) header at the start of it. + * If your InputStream does not support mark / reset, + * then wrap it in a PushBackInputStream, then be + * sure to always use that, and not the original! + * @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream + */ + public static boolean hasPOIFSHeader(InputStream inp) throws IOException { + // We want to peek at the first 8 bytes + inp.mark(8); + + byte[] header = new byte[8]; + IOUtils.readFully(inp, header); + LongField signature = new LongField(HeaderBlockConstants._signature_offset, header); + + // Wind back those 8 bytes + if(inp instanceof PushbackInputStream) { + PushbackInputStream pin = (PushbackInputStream)inp; + pin.unread(header); + } else { + inp.reset(); + } + + // Did it match the signature? + return (signature.get() == HeaderBlockConstants._signature); + } /** * Create a new document to be added to the root directory diff --git a/src/java/org/apache/poi/poifs/storage/HeaderBlockReader.java b/src/java/org/apache/poi/poifs/storage/HeaderBlockReader.java index 16c94e2c2..0d5bb817b 100644 --- a/src/java/org/apache/poi/poifs/storage/HeaderBlockReader.java +++ b/src/java/org/apache/poi/poifs/storage/HeaderBlockReader.java @@ -91,8 +91,11 @@ public class HeaderBlockReader if (signature.get() != _signature) { // Is it one of the usual suspects? - if(_data[0] == 0x50 && _data[1] == 0x4b && _data[2] == 0x03 && - _data[3] == 0x04) { + byte[] OOXML_FILE_HEADER = POIFSConstants.OOXML_FILE_HEADER; + if(_data[0] == OOXML_FILE_HEADER[0] && + _data[1] == OOXML_FILE_HEADER[1] && + _data[2] == OOXML_FILE_HEADER[2] && + _data[3] == OOXML_FILE_HEADER[3]) { throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. POI only supports OLE2 Office documents"); } diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java b/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java index c2b2aa6d9..9849a7d21 100644 --- a/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java +++ b/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java @@ -18,9 +18,15 @@ package org.apache.poi.hxf; import java.io.File; import java.io.IOException; +import java.io.InputStream; +import java.io.PushbackInputStream; import java.util.ArrayList; import org.apache.poi.POIXMLDocument; +import org.apache.poi.poifs.common.POIFSConstants; +import org.apache.poi.poifs.storage.HeaderBlockConstants; +import org.apache.poi.util.IOUtils; +import org.apache.poi.util.LongField; import org.apache.xmlbeans.XmlException; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -87,6 +93,39 @@ public abstract class HXFDocument { } } + /** + * Checks that the supplied InputStream (which MUST + * support mark and reset, or be a PushbackInputStream) + * has a OOXML (zip) header at the start of it. + * If your InputStream does not support mark / reset, + * then wrap it in a PushBackInputStream, then be + * sure to always use that, and not the original! + * @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream + */ + public static boolean hasOOXMLHeader(InputStream inp) throws IOException { + // We want to peek at the first 4 bytes + inp.mark(4); + + byte[] header = new byte[4]; + IOUtils.readFully(inp, header); + + // Wind back those 4 bytes + if(inp instanceof PushbackInputStream) { + PushbackInputStream pin = (PushbackInputStream)inp; + pin.unread(header); + } else { + inp.reset(); + } + + // Did it match the ooxml zip signature? + return ( + header[0] == POIFSConstants.OOXML_FILE_HEADER[0] && + header[1] == POIFSConstants.OOXML_FILE_HEADER[1] && + header[2] == POIFSConstants.OOXML_FILE_HEADER[2] && + header[3] == POIFSConstants.OOXML_FILE_HEADER[3] + ); + } + /** * Fetches the (single) PackagePart with the supplied * content type. diff --git a/src/scratchpad/ooxml-testcases/org/apache/poi/hxf/TestDetectAsOOXML.java b/src/scratchpad/ooxml-testcases/org/apache/poi/hxf/TestDetectAsOOXML.java new file mode 100644 index 000000000..36adb497c --- /dev/null +++ b/src/scratchpad/ooxml-testcases/org/apache/poi/hxf/TestDetectAsOOXML.java @@ -0,0 +1,65 @@ + +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.hxf; + +import junit.framework.TestCase; +import java.io.*; + +/** + * Class to test that HXF correctly detects OOXML + * documents + */ +public class TestDetectAsOOXML extends TestCase +{ + public String dirname; + + public void setUp() { + dirname = System.getProperty("HSSF.testdata.path"); + } + + public void testOpensProperly() throws Exception + { + File f = new File(dirname + "/sample.xlsx"); + + HXFDocument.openPackage(f); + } + + public void testDetectAsPOIFS() throws Exception { + InputStream in; + + // ooxml file is + in = new PushbackInputStream( + new FileInputStream(dirname + "/SampleSS.xlsx"), 10 + ); + assertTrue(HXFDocument.hasOOXMLHeader(in)); + + // xls file isn't + in = new PushbackInputStream( + new FileInputStream(dirname + "/SampleSS.xls"), 10 + ); + assertFalse(HXFDocument.hasOOXMLHeader(in)); + + // text file isn't + in = new PushbackInputStream( + new FileInputStream(dirname + "/SampleSS.txt"), 10 + ); + assertFalse(HXFDocument.hasOOXMLHeader(in)); + } +} diff --git a/src/testcases/org/apache/poi/poifs/filesystem/TestOffice2007XMLException.java b/src/testcases/org/apache/poi/poifs/filesystem/TestOffice2007XMLException.java index db8607d37..01a0f42d3 100644 --- a/src/testcases/org/apache/poi/poifs/filesystem/TestOffice2007XMLException.java +++ b/src/testcases/org/apache/poi/poifs/filesystem/TestOffice2007XMLException.java @@ -47,4 +47,26 @@ public class TestOffice2007XMLException extends TestCase // Good } } + + public void testDetectAsPOIFS() throws IOException { + InputStream in; + + // ooxml file isn't + in = new PushbackInputStream( + new FileInputStream(dirname + "/SampleSS.xlsx"), 10 + ); + assertFalse(POIFSFileSystem.hasPOIFSHeader(in)); + + // xls file is + in = new PushbackInputStream( + new FileInputStream(dirname + "/SampleSS.xls"), 10 + ); + assertTrue(POIFSFileSystem.hasPOIFSHeader(in)); + + // text file isn't + in = new PushbackInputStream( + new FileInputStream(dirname + "/SampleSS.txt"), 10 + ); + assertFalse(POIFSFileSystem.hasPOIFSHeader(in)); + } }