#61381 - PushbackInputStreams passed to ZipHelper may not hold 8 bytes

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1804854 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2017-08-11 20:47:48 +00:00
parent 9ae6952845
commit 6f341568b6
17 changed files with 455 additions and 423 deletions

View File

@ -17,22 +17,22 @@
package org.apache.poi.poifs.filesystem; package org.apache.poi.poifs.filesystem;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.util.IOUtils;
import java.io.FilterInputStream; import java.io.FilterInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.PushbackInputStream;
import java.security.GeneralSecurityException; import java.security.GeneralSecurityException;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.util.Internal;
import org.apache.poi.util.Removal;
/** /**
* A small base class for the various factories, e.g. WorkbookFactory, * A small base class for the various factories, e.g. WorkbookFactory,
* SlideShowFactory to combine common code here. * SlideShowFactory to combine common code here.
*/ */
@Internal
public class DocumentFactoryHelper { public class DocumentFactoryHelper {
/** /**
* Wrap the OLE2 data in the NPOIFSFileSystem into a decrypted stream by using * Wrap the OLE2 data in the NPOIFSFileSystem into a decrypted stream by using
@ -81,36 +81,19 @@ public class DocumentFactoryHelper {
/** /**
* Checks that the supplied InputStream (which MUST * Checks that the supplied InputStream (which MUST
* support mark and reset, or be a PushbackInputStream) * support mark and reset) has a OOXML (zip) header at the start of it.<p>
* has a OOXML (zip) header at the start of it. *
* If your InputStream does not support mark / reset, * If unsure if your InputStream does support mark / reset,
* then wrap it in a PushBackInputStream, then be * use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make
* sure to always use that, and not the original! * sure to always use that, and not the original!
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream *
* @param inp An InputStream which supports either mark/reset
*
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == FileMagic.OOXML instead
*/ */
@Deprecated
@Removal(version="4.0")
public static boolean hasOOXMLHeader(InputStream inp) throws IOException { public static boolean hasOOXMLHeader(InputStream inp) throws IOException {
// We want to peek at the first 4 bytes return FileMagic.valueOf(inp) == FileMagic.OOXML;
inp.mark(4);
byte[] header = new byte[4];
int bytesRead = IOUtils.readFully(inp, header);
// Wind back those 4 bytes
if(inp instanceof PushbackInputStream) {
PushbackInputStream pin = (PushbackInputStream)inp;
pin.unread(header, 0, bytesRead);
} else {
inp.reset();
} }
// Did it match the ooxml zip signature?
return (
bytesRead == 4 &&
header[0] == POIFSConstants.OOXML_FILE_HEADER[0] &&
header[1] == POIFSConstants.OOXML_FILE_HEADER[1] &&
header[2] == POIFSConstants.OOXML_FILE_HEADER[2] &&
header[3] == POIFSConstants.OOXML_FILE_HEADER[3]
);
}
} }

View File

@ -0,0 +1,155 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.poifs.filesystem;
import static org.apache.poi.poifs.common.POIFSConstants.OOXML_FILE_HEADER;
import static org.apache.poi.poifs.common.POIFSConstants.RAW_XML_FILE_HEADER;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.poifs.storage.HeaderBlockConstants;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LocaleUtil;
/**
* The file magic number, i.e. the file identification based on the first bytes
* of the file
*/
public enum FileMagic {
/** OLE2 / BIFF8+ stream used for Office 97 and higher documents */
OLE2(HeaderBlockConstants._signature),
/** OOXML / ZIP stream */
OOXML(OOXML_FILE_HEADER),
/** XML file */
XML(RAW_XML_FILE_HEADER),
/** BIFF2 raw stream - for Excel 2 */
BIFF2(new byte[]{
0x09, 0x00, // sid=0x0009
0x04, 0x00, // size=0x0004
0x00, 0x00, // unused
0x70, 0x00 // 0x70 = multiple values
}),
/** BIFF3 raw stream - for Excel 3 */
BIFF3(new byte[]{
0x09, 0x02, // sid=0x0209
0x06, 0x00, // size=0x0006
0x00, 0x00, // unused
0x70, 0x00 // 0x70 = multiple values
}),
/** BIFF4 raw stream - for Excel 4 */
BIFF4(new byte[]{
0x09, 0x04, // sid=0x0409
0x06, 0x00, // size=0x0006
0x00, 0x00, // unused
0x70, 0x00 // 0x70 = multiple values
},new byte[]{
0x09, 0x04, // sid=0x0409
0x06, 0x00, // size=0x0006
0x00, 0x00, // unused
0x00, 0x01
}),
/** Old MS Write raw stream */
MSWRITE(
new byte[]{0x31, (byte)0xbe, 0x00, 0x00 },
new byte[]{0x32, (byte)0xbe, 0x00, 0x00 }),
/** RTF document */
RTF("{\\rtf"),
/** PDF document */
PDF("%PDF"),
// keep UNKNOWN always as last enum!
/** UNKNOWN magic */
UNKNOWN(new byte[0]);
final byte[][] magic;
FileMagic(long magic) {
this.magic = new byte[1][8];
LittleEndian.putLong(this.magic[0], 0, magic);
}
FileMagic(byte[]... magic) {
this.magic = magic;
}
FileMagic(String magic) {
this(magic.getBytes(LocaleUtil.CHARSET_1252));
}
public static FileMagic valueOf(byte[] magic) {
for (FileMagic fm : values()) {
int i=0;
boolean found = true;
for (byte[] ma : fm.magic) {
for (byte m : ma) {
byte d = magic[i++];
if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) {
found = false;
break;
}
}
if (found) {
return fm;
}
}
}
return UNKNOWN;
}
/**
* Get the file magic of the supplied InputStream (which MUST
* support mark and reset).<p>
*
* If unsure if your InputStream does support mark / reset,
* use {@link #prepareToCheckMagic(InputStream)} to wrap it and make
* sure to always use that, and not the original!<p>
*
* Even if this method returns {@link FileMagic#UNKNOWN} it could potentially mean,
* that the ZIP stream has leading junk bytes
*
* @param inp An InputStream which supports either mark/reset
*/
public static FileMagic valueOf(InputStream inp) throws IOException {
if (!inp.markSupported()) {
throw new IOException("getFileMagic() only operates on streams which support mark(int)");
}
// Grab the first 8 bytes
byte[] data = IOUtils.peekFirst8Bytes(inp);
return FileMagic.valueOf(data);
}
/**
* Checks if an {@link InputStream} can be reseted (i.e. used for checking the header magic) and wraps it if not
*
* @param stream stream to be checked for wrapping
* @return a mark enabled stream
*/
public static InputStream prepareToCheckMagic(InputStream stream) {
if (stream.markSupported()) {
return stream;
}
// we used to process the data via a PushbackInputStream, but user code could provide a too small one
// so we use a BufferedInputStream instead now
return new BufferedInputStream(stream);
}
}

View File

@ -19,6 +19,7 @@
package org.apache.poi.poifs.filesystem; package org.apache.poi.poifs.filesystem;
import java.io.ByteArrayInputStream;
import java.io.Closeable; import java.io.Closeable;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
@ -26,7 +27,6 @@ import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.io.PushbackInputStream;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.channels.Channels; import java.nio.channels.Channels;
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
@ -51,14 +51,13 @@ import org.apache.poi.poifs.storage.BATBlock.BATBlockAndIndex;
import org.apache.poi.poifs.storage.BlockAllocationTableReader; import org.apache.poi.poifs.storage.BlockAllocationTableReader;
import org.apache.poi.poifs.storage.BlockAllocationTableWriter; import org.apache.poi.poifs.storage.BlockAllocationTableWriter;
import org.apache.poi.poifs.storage.HeaderBlock; import org.apache.poi.poifs.storage.HeaderBlock;
import org.apache.poi.poifs.storage.HeaderBlockConstants;
import org.apache.poi.poifs.storage.HeaderBlockWriter; import org.apache.poi.poifs.storage.HeaderBlockWriter;
import org.apache.poi.util.CloseIgnoringInputStream; import org.apache.poi.util.CloseIgnoringInputStream;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
import org.apache.poi.util.LongField;
import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger; import org.apache.poi.util.POILogger;
import org.apache.poi.util.Removal;
/** /**
* <p>This is the main class of the POIFS system; it manages the entire * <p>This is the main class of the POIFS system; it manages the entire
@ -353,44 +352,38 @@ public class NPOIFSFileSystem extends BlockStore
/** /**
* Checks that the supplied InputStream (which MUST * Checks that the supplied InputStream (which MUST
* support mark and reset, or be a PushbackInputStream) * support mark and reset) has a POIFS (OLE2) header at the start of it.
* has a POIFS (OLE2) header at the start of it. * If unsure if your InputStream does support mark / reset,
* If your InputStream does not support mark / reset, * use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make
* then wrap it in a PushBackInputStream, then be * sure to always use that, and not the original!
* sure to always use that and not the original!
* *
* After the method call, the InputStream is at the * After the method call, the InputStream is at the
* same position as of the time of entering the method. * same position as of the time of entering the method.
* *
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream * @param inp An InputStream which supports mark/reset
*
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead
*/ */
@Deprecated
@Removal(version="4.0")
public static boolean hasPOIFSHeader(InputStream inp) throws IOException { public static boolean hasPOIFSHeader(InputStream inp) throws IOException {
// We want to peek at the first 8 bytes return FileMagic.valueOf(inp) == FileMagic.OLE2;
inp.mark(8);
byte[] header = new byte[8];
int bytesRead = IOUtils.readFully(inp, header);
LongField signature = new LongField(HeaderBlockConstants._signature_offset, header);
// Wind back those 8 bytes
if(inp instanceof PushbackInputStream) {
PushbackInputStream pin = (PushbackInputStream)inp;
pin.unread(header, 0, bytesRead);
} else {
inp.reset();
}
// Did it match the signature?
return (signature.get() == HeaderBlockConstants._signature);
} }
/** /**
* Checks if the supplied first 8 bytes of a stream / file * Checks if the supplied first 8 bytes of a stream / file
* has a POIFS (OLE2) header. * has a POIFS (OLE2) header.
*
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead
*/ */
@Deprecated
@Removal(version="4.0")
public static boolean hasPOIFSHeader(byte[] header8Bytes) { public static boolean hasPOIFSHeader(byte[] header8Bytes) {
LongField signature = new LongField(HeaderBlockConstants._signature_offset, header8Bytes); try {
return (signature.get() == HeaderBlockConstants._signature); return hasPOIFSHeader(new ByteArrayInputStream(header8Bytes));
} catch (IOException e) {
throw new RuntimeException("invalid header check", e);
}
} }
/** /**

View File

@ -42,16 +42,14 @@ import org.apache.poi.poifs.storage.BlockAllocationTableWriter;
import org.apache.poi.poifs.storage.BlockList; import org.apache.poi.poifs.storage.BlockList;
import org.apache.poi.poifs.storage.BlockWritable; import org.apache.poi.poifs.storage.BlockWritable;
import org.apache.poi.poifs.storage.HeaderBlock; import org.apache.poi.poifs.storage.HeaderBlock;
import org.apache.poi.poifs.storage.HeaderBlockConstants;
import org.apache.poi.poifs.storage.HeaderBlockWriter; import org.apache.poi.poifs.storage.HeaderBlockWriter;
import org.apache.poi.poifs.storage.RawDataBlockList; import org.apache.poi.poifs.storage.RawDataBlockList;
import org.apache.poi.poifs.storage.SmallBlockTableReader; import org.apache.poi.poifs.storage.SmallBlockTableReader;
import org.apache.poi.poifs.storage.SmallBlockTableWriter; import org.apache.poi.poifs.storage.SmallBlockTableWriter;
import org.apache.poi.util.CloseIgnoringInputStream; import org.apache.poi.util.CloseIgnoringInputStream;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LongField;
import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger; import org.apache.poi.util.POILogger;
import org.apache.poi.util.Removal;
/** /**
* <p>This is the main class of the POIFS system; it manages the entire * <p>This is the main class of the POIFS system; it manages the entire
@ -200,27 +198,34 @@ public class OPOIFSFileSystem
/** /**
* Checks that the supplied InputStream (which MUST * Checks that the supplied InputStream (which MUST
* support mark and reset, or be a PushbackInputStream) * support mark and reset) has a POIFS (OLE2) header at the start of it.
* has a POIFS (OLE2) header at the start of it. * If unsure if your InputStream does support mark / reset,
* If your InputStream does not support mark / reset, * use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make
* then wrap it in a PushBackInputStream, then be
* sure to always use that, and not the original! * sure to always use that, and not the original!
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream *
* After the method call, the InputStream is at the
* same position as of the time of entering the method.
*
* @param inp An InputStream which supports either mark/reset
*
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead
*/ */
@Deprecated
@Removal(version="4.0")
public static boolean hasPOIFSHeader(InputStream inp) throws IOException { public static boolean hasPOIFSHeader(InputStream inp) throws IOException {
// We want to peek at the first 8 bytes return NPOIFSFileSystem.hasPOIFSHeader(inp);
byte[] header = IOUtils.peekFirst8Bytes(inp);
return hasPOIFSHeader(header);
} }
/** /**
* Checks if the supplied first 8 bytes of a stream / file * Checks if the supplied first 8 bytes of a stream / file
* has a POIFS (OLE2) header. * has a POIFS (OLE2) header.
*
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead
*/ */
@Deprecated
@Removal(version="4.0")
public static boolean hasPOIFSHeader(byte[] header8Bytes) { public static boolean hasPOIFSHeader(byte[] header8Bytes) {
LongField signature = new LongField(HeaderBlockConstants._signature_offset, header8Bytes); return NPOIFSFileSystem.hasPOIFSHeader(header8Bytes);
// Did it match the signature?
return (signature.get() == HeaderBlockConstants._signature);
} }
/** /**

View File

@ -114,27 +114,6 @@ public class POIFSFileSystem
super(file); super(file);
} }
/**
* Checks that the supplied InputStream (which MUST
* support mark and reset, or be a PushbackInputStream)
* has a POIFS (OLE2) header at the start of it.
* If your InputStream does not support mark / reset,
* then wrap it in a PushBackInputStream, then be
* sure to always use that, and not the original!
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream
*/
public static boolean hasPOIFSHeader(InputStream inp) throws IOException {
return NPOIFSFileSystem.hasPOIFSHeader(inp);
}
/**
* Checks if the supplied first 8 bytes of a stream / file
* has a POIFS (OLE2) header.
*/
public static boolean hasPOIFSHeader(byte[] header8Bytes) {
return NPOIFSFileSystem.hasPOIFSHeader(header8Bytes);
}
/** /**
* Creates a new {@link POIFSFileSystem} in a new {@link File}. * Creates a new {@link POIFSFileSystem} in a new {@link File}.
* Use {@link #POIFSFileSystem(File)} to open an existing File, * Use {@link #POIFSFileSystem(File)} to open an existing File,

View File

@ -17,8 +17,8 @@
package org.apache.poi.poifs.macros; package org.apache.poi.poifs.macros;
import static org.apache.poi.util.StringUtil.startsWithIgnoreCase;
import static org.apache.poi.util.StringUtil.endsWithIgnoreCase; import static org.apache.poi.util.StringUtil.endsWithIgnoreCase;
import static org.apache.poi.util.StringUtil.startsWithIgnoreCase;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
@ -27,7 +27,6 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.PushbackInputStream;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
@ -38,6 +37,7 @@ import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentInputStream; import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.DocumentNode; import org.apache.poi.poifs.filesystem.DocumentNode;
import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.util.CodePageUtil; import org.apache.poi.util.CodePageUtil;
@ -67,13 +67,12 @@ public class VBAMacroReader implements Closeable {
private NPOIFSFileSystem fs; private NPOIFSFileSystem fs;
public VBAMacroReader(InputStream rstream) throws IOException { public VBAMacroReader(InputStream rstream) throws IOException {
PushbackInputStream stream = new PushbackInputStream(rstream, 8); InputStream is = FileMagic.prepareToCheckMagic(rstream);
byte[] header8 = IOUtils.peekFirst8Bytes(stream); FileMagic fm = FileMagic.valueOf(is);
if (fm == FileMagic.OLE2) {
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) { fs = new NPOIFSFileSystem(is);
fs = new NPOIFSFileSystem(stream);
} else { } else {
openOOXML(stream); openOOXML(is);
} }
} }

View File

@ -26,6 +26,7 @@ import java.util.Arrays;
import org.apache.poi.hssf.OldExcelFormatException; import org.apache.poi.hssf.OldExcelFormatException;
import org.apache.poi.poifs.common.POIFSBigBlockSize; import org.apache.poi.poifs.common.POIFSBigBlockSize;
import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.NotOLE2FileException; import org.apache.poi.poifs.filesystem.NotOLE2FileException;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.util.HexDump; import org.apache.poi.util.HexDump;
@ -40,41 +41,6 @@ import org.apache.poi.util.ShortField;
* The block containing the archive header * The block containing the archive header
*/ */
public final class HeaderBlock implements HeaderBlockConstants { public final class HeaderBlock implements HeaderBlockConstants {
private static final byte[] MAGIC_BIFF2 = {
0x09, 0x00, // sid=0x0009
0x04, 0x00, // size=0x0004
0x00, 0x00, // unused
0x70, 0x00 // 0x70 = multiple values
};
private static final byte[] MAGIC_BIFF3 = {
0x09, 0x02, // sid=0x0209
0x06, 0x00, // size=0x0006
0x00, 0x00, // unused
0x70, 0x00 // 0x70 = multiple values
};
private static final byte[] MAGIC_BIFF4a = {
0x09, 0x04, // sid=0x0409
0x06, 0x00, // size=0x0006
0x00, 0x00, // unused
0x70, 0x00 // 0x70 = multiple values
};
private static final byte[] MAGIC_BIFF4b = {
0x09, 0x04, // sid=0x0409
0x06, 0x00, // size=0x0006
0x00, 0x00, // unused
0x00, 0x01
};
private static final byte[] MAGIC_MSWRITEa = {
0x31, (byte)0xbe, 0x00, 0x00
};
private static final byte[] MAGIC_MSWRITEb = {
0x32, (byte)0xbe, 0x00, 0x00
};
private static final byte _default_value = ( byte ) 0xFF; private static final byte _default_value = ( byte ) 0xFF;
/** /**
@ -151,53 +117,35 @@ public final class HeaderBlock implements HeaderBlockConstants {
this._data = data.clone(); this._data = data.clone();
// verify signature // verify signature
long signature = LittleEndian.getLong(_data, _signature_offset); FileMagic fm = FileMagic.valueOf(data);
if (signature != _signature) { switch (fm) {
// Is it one of the usual suspects? case OLE2:
if (cmp(POIFSConstants.OOXML_FILE_HEADER, data)) { break;
case OOXML:
throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. " throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. "
+ "You are calling the part of POI that deals with OLE2 Office Documents. " + "You are calling the part of POI that deals with OLE2 Office Documents. "
+ "You need to call a different part of POI to process this data (eg XSSF instead of HSSF)"); + "You need to call a different part of POI to process this data (eg XSSF instead of HSSF)");
} case XML:
if (cmp(POIFSConstants.RAW_XML_FILE_HEADER, data)) {
throw new NotOLE2FileException("The supplied data appears to be a raw XML file. " throw new NotOLE2FileException("The supplied data appears to be a raw XML file. "
+ "Formats such as Office 2003 XML are not supported"); + "Formats such as Office 2003 XML are not supported");
} case MSWRITE:
// Old MS Write raw stream
if (cmp(MAGIC_MSWRITEa, data) || cmp(MAGIC_MSWRITEb, data)) {
throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. " throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. "
+ "Apache POI doesn't currently support this format"); + "Apache POI doesn't currently support this format");
} case BIFF2:
case BIFF3:
// BIFF2 raw stream case BIFF4:
if (cmp(MAGIC_BIFF2, data)) { throw new OldExcelFormatException("The supplied data appears to be in "+fm+" format. "
throw new OldExcelFormatException("The supplied data appears to be in BIFF2 format. "
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor"); + "HSSF only supports the BIFF8 format, try OldExcelExtractor");
} default:
// BIFF3 raw stream
if (cmp(MAGIC_BIFF3, data)) {
throw new OldExcelFormatException("The supplied data appears to be in BIFF3 format. "
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor");
}
// BIFF4 raw stream
if (cmp(MAGIC_BIFF4a, data) || cmp(MAGIC_BIFF4b, data)) {
throw new OldExcelFormatException("The supplied data appears to be in BIFF4 format. "
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor");
}
// Give a generic error if the OLE2 signature isn't found // Give a generic error if the OLE2 signature isn't found
throw new NotOLE2FileException("Invalid header signature; read " String exp = HexDump.longToHex(_signature);
+ HexDump.longToHex(signature) + ", expected " String act = HexDump.longToHex(LittleEndian.getLong(data, 0));
+ HexDump.longToHex(_signature) + " - Your file appears " throw new NotOLE2FileException(
+ "not to be a valid OLE2 document"); "Invalid header signature; read " + act + ", expected " + exp +
" - Your file appears not to be a valid OLE2 document");
} }
// Figure out our block size // Figure out our block size
if (_data[30] == 12) { if (_data[30] == 12) {
this.bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS; this.bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS;
@ -434,15 +382,4 @@ public final class HeaderBlock implements HeaderBlockConstants {
stream.write(0); stream.write(0);
} }
} }
private static boolean cmp(byte[] magic, byte[] data) {
int i=0;
for (byte m : magic) {
byte d = data[i++];
if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) {
return false;
}
}
return true;
}
} }

View File

@ -20,7 +20,6 @@ import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.PushbackInputStream;
import java.lang.reflect.InvocationTargetException; import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method; import java.lang.reflect.Method;
@ -30,6 +29,7 @@ import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
import org.apache.poi.poifs.crypt.Decryptor; import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
@ -94,9 +94,7 @@ public class SlideShowFactory {
* Creates the appropriate HSLFSlideShow / XMLSlideShow from * Creates the appropriate HSLFSlideShow / XMLSlideShow from
* the given InputStream. * the given InputStream.
* *
* <p>Your input stream MUST either support mark/reset, or * <p>Note that using an {@link InputStream} has a higher memory footprint
* be wrapped as a {@link PushbackInputStream}! Note that
* using an {@link InputStream} has a higher memory footprint
* than using a {@link File}.</p> * than using a {@link File}.</p>
* *
* <p>Note that in order to properly release resources the * <p>Note that in order to properly release resources the
@ -118,9 +116,8 @@ public class SlideShowFactory {
/** /**
* Creates the appropriate HSLFSlideShow / XMLSlideShow from * Creates the appropriate HSLFSlideShow / XMLSlideShow from
* the given InputStream, which may be password protected. * the given InputStream, which may be password protected.
* <p>Your input stream MUST either support mark/reset, or *
* be wrapped as a {@link PushbackInputStream}! Note that * <p>Note that using an {@link InputStream} has a higher memory footprint
* using an {@link InputStream} has a higher memory footprint
* than using a {@link File}.</p> * than using a {@link File}.</p>
* *
* <p>Note that in order to properly release resources the * <p>Note that in order to properly release resources the
@ -137,24 +134,19 @@ public class SlideShowFactory {
* @throws EncryptedDocumentException If the wrong password is given for a protected file * @throws EncryptedDocumentException If the wrong password is given for a protected file
*/ */
public static SlideShow<?,?> create(InputStream inp, String password) throws IOException, EncryptedDocumentException { public static SlideShow<?,?> create(InputStream inp, String password) throws IOException, EncryptedDocumentException {
// If clearly doesn't do mark/reset, wrap up InputStream is = FileMagic.prepareToCheckMagic(inp);
if (! inp.markSupported()) { FileMagic fm = FileMagic.valueOf(is);
inp = new PushbackInputStream(inp, 8);
}
// Ensure that there is at least some data there switch (fm) {
byte[] header8 = IOUtils.peekFirst8Bytes(inp); case OLE2:
NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
// Try to create
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) {
NPOIFSFileSystem fs = new NPOIFSFileSystem(inp);
return create(fs, password); return create(fs, password);
} case OOXML:
if (DocumentFactoryHelper.hasOOXMLHeader(inp)) { return createXSLFSlideShow(is);
return createXSLFSlideShow(inp); default:
}
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
} }
}
/** /**
* Creates the appropriate HSLFSlideShow / XMLSlideShow from * Creates the appropriate HSLFSlideShow / XMLSlideShow from

View File

@ -21,7 +21,6 @@ import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
@ -45,8 +44,8 @@ import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.crypt.EncryptionInfo; import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.NotOLE2FileException; import org.apache.poi.poifs.filesystem.NotOLE2FileException;
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem; import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
@ -175,22 +174,21 @@ public class ExtractorFactory {
} }
public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException { public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException {
// Figure out the kind of stream InputStream is = FileMagic.prepareToCheckMagic(inp);
// If clearly doesn't do mark/reset, wrap up
if (! inp.markSupported()) {
inp = new PushbackInputStream(inp, 8);
}
if (NPOIFSFileSystem.hasPOIFSHeader(inp)) { FileMagic fm = FileMagic.valueOf(is);
NPOIFSFileSystem fs = new NPOIFSFileSystem(inp);
switch (fm) {
case OLE2:
NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY); boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY);
return isEncrypted ? createEncyptedOOXMLExtractor(fs) : createExtractor(fs); return isEncrypted ? createEncyptedOOXMLExtractor(fs) : createExtractor(fs);
} case OOXML:
if (DocumentFactoryHelper.hasOOXMLHeader(inp)) { return createExtractor(OPCPackage.open(is));
return createExtractor(OPCPackage.open(inp)); default:
}
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
} }
}
/** /**
* Tries to determine the actual type of file and produces a matching text-extractor for it. * Tries to determine the actual type of file and produces a matching text-extractor for it.

View File

@ -22,7 +22,6 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.PushbackInputStream;
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.util.Enumeration; import java.util.Enumeration;
@ -38,12 +37,11 @@ import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.poi.openxml4j.opc.ZipPackage; import org.apache.poi.openxml4j.opc.ZipPackage;
import org.apache.poi.openxml4j.util.ZipSecureFile; import org.apache.poi.openxml4j.util.ZipSecureFile;
import org.apache.poi.openxml4j.util.ZipSecureFile.ThresholdInputStream; import org.apache.poi.openxml4j.util.ZipSecureFile.ThresholdInputStream;
import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.storage.HeaderBlockConstants; import org.apache.poi.util.Internal;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.Removal; import org.apache.poi.util.Removal;
@Internal
public final class ZipHelper { public final class ZipHelper {
/** /**
* Forward slash use to convert part name between OPC and zip item naming * Forward slash use to convert part name between OPC and zip item naming
@ -172,61 +170,31 @@ public final class ZipHelper {
* Warning - this will consume the first few bytes of the stream, * Warning - this will consume the first few bytes of the stream,
* you should push-back or reset the stream after use! * you should push-back or reset the stream after use!
*/ */
public static void verifyZipHeader(InputStream stream) public static void verifyZipHeader(InputStream stream) throws NotOfficeXmlFileException, IOException {
throws NotOfficeXmlFileException, IOException { InputStream is = FileMagic.prepareToCheckMagic(stream);
// Grab the first 8 bytes FileMagic fm = FileMagic.valueOf(is);
byte[] data = new byte[8];
IOUtils.readFully(stream, data);
// OLE2? switch (fm) {
long signature = LittleEndian.getLong(data); case OLE2:
if (signature == HeaderBlockConstants._signature) {
throw new OLE2NotOfficeXmlFileException( throw new OLE2NotOfficeXmlFileException(
"The supplied data appears to be in the OLE2 Format. " + "The supplied data appears to be in the OLE2 Format. " +
"You are calling the part of POI that deals with OOXML "+ "You are calling the part of POI that deals with OOXML "+
"(Office Open XML) Documents. You need to call a different " + "(Office Open XML) Documents. You need to call a different " +
"part of POI to process this data (eg HSSF instead of XSSF)"); "part of POI to process this data (eg HSSF instead of XSSF)");
} case XML:
// Raw XML?
byte[] RAW_XML_FILE_HEADER = POIFSConstants.RAW_XML_FILE_HEADER;
if (data[0] == RAW_XML_FILE_HEADER[0] &&
data[1] == RAW_XML_FILE_HEADER[1] &&
data[2] == RAW_XML_FILE_HEADER[2] &&
data[3] == RAW_XML_FILE_HEADER[3] &&
data[4] == RAW_XML_FILE_HEADER[4]) {
throw new NotOfficeXmlFileException( throw new NotOfficeXmlFileException(
"The supplied data appears to be a raw XML file. " + "The supplied data appears to be a raw XML file. " +
"Formats such as Office 2003 XML are not supported"); "Formats such as Office 2003 XML are not supported");
} default:
case OOXML:
case UNKNOWN:
// Don't check for a Zip header, as to maintain backwards // Don't check for a Zip header, as to maintain backwards
// compatibility we need to let them seek over junk at the // compatibility we need to let them seek over junk at the
// start before beginning processing. // start before beginning processing.
break;
// Put things back
if (stream instanceof PushbackInputStream) {
((PushbackInputStream)stream).unread(data);
} else if (stream.markSupported()) {
stream.reset();
} else if (stream instanceof FileInputStream) {
// File open check, about to be closed, nothing to do
} else {
// Oh dear... I hope you know what you're doing!
} }
} }
private static InputStream prepareToCheckHeader(InputStream stream) {
if (stream instanceof PushbackInputStream) {
return stream;
}
if (stream.markSupported()) {
stream.mark(8);
return stream;
}
return new PushbackInputStream(stream, 8);
}
/** /**
* Opens the specified stream as a secure zip * Opens the specified stream as a secure zip
* *
@ -237,7 +205,7 @@ public final class ZipHelper {
@SuppressWarnings("resource") @SuppressWarnings("resource")
public static ThresholdInputStream openZipStream(InputStream stream) throws IOException { public static ThresholdInputStream openZipStream(InputStream stream) throws IOException {
// Peek at the first few bytes to sanity check // Peek at the first few bytes to sanity check
InputStream checkedStream = prepareToCheckHeader(stream); InputStream checkedStream = FileMagic.prepareToCheckMagic(stream);
verifyZipHeader(checkedStream); verifyZipHeader(checkedStream);
// Open as a proper zip stream // Open as a proper zip stream

View File

@ -198,10 +198,11 @@ public class ZipSecureFile extends ZipFile {
public static class ThresholdInputStream extends PushbackInputStream { public static class ThresholdInputStream extends PushbackInputStream {
long counter = 0; long counter = 0;
long markPos = 0;
ThresholdInputStream cis; ThresholdInputStream cis;
public ThresholdInputStream(InputStream is, ThresholdInputStream cis) { public ThresholdInputStream(InputStream is, ThresholdInputStream cis) {
super(is,1); super(is);
this.cis = cis; this.cis = cis;
} }
@ -225,14 +226,15 @@ public class ZipSecureFile extends ZipFile {
@Override @Override
public long skip(long n) throws IOException { public long skip(long n) throws IOException {
counter = 0; long s = in.skip(n);
return in.skip(n); counter += s;
return s;
} }
@Override @Override
public synchronized void reset() throws IOException { public synchronized void reset() throws IOException {
counter = 0; counter = markPos;
in.reset(); super.reset();
} }
public void advance(int advance) throws IOException { public void advance(int advance) throws IOException {
@ -263,10 +265,10 @@ public class ZipSecureFile extends ZipFile {
} }
// one of the limits was reached, report it // one of the limits was reached, report it
throw new IOException("Zip bomb detected! The file would exceed the max. ratio of compressed file size to the size of the expanded data. " throw new IOException("Zip bomb detected! The file would exceed the max. ratio of compressed file size to the size of the expanded data.\n"
+ "This may indicate that the file is used to inflate memory usage and thus could pose a security risk. " + "This may indicate that the file is used to inflate memory usage and thus could pose a security risk.\n"
+ "You can adjust this limit via ZipSecureFile.setMinInflateRatio() if you need to work with files which exceed this limit. " + "You can adjust this limit via ZipSecureFile.setMinInflateRatio() if you need to work with files which exceed this limit.\n"
+ "Counter: " + counter + ", cis.counter: " + cis.counter + ", ratio: " + (((double)cis.counter)/counter) + "Counter: " + counter + ", cis.counter: " + cis.counter + ", ratio: " + ratio + "\n"
+ "Limits: MIN_INFLATE_RATIO: " + MIN_INFLATE_RATIO); + "Limits: MIN_INFLATE_RATIO: " + MIN_INFLATE_RATIO);
} }
@ -322,6 +324,7 @@ public class ZipSecureFile extends ZipFile {
@Override @Override
public synchronized void mark(int readlimit) { public synchronized void mark(int readlimit) {
markPos = counter;
in.mark(readlimit); in.mark(readlimit);
} }
} }

View File

@ -16,11 +16,11 @@
==================================================================== */ ==================================================================== */
package org.apache.poi.ss.usermodel; package org.apache.poi.ss.usermodel;
import java.io.BufferedInputStream;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.PushbackInputStream;
import org.apache.poi.EmptyFileException; import org.apache.poi.EmptyFileException;
import org.apache.poi.EncryptedDocumentException; import org.apache.poi.EncryptedDocumentException;
@ -32,6 +32,7 @@ import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.poifs.crypt.Decryptor; import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@ -127,7 +128,7 @@ public class WorkbookFactory {
* the given InputStream. * the given InputStream.
* *
* <p>Your input stream MUST either support mark/reset, or * <p>Your input stream MUST either support mark/reset, or
* be wrapped as a {@link PushbackInputStream}! Note that * be wrapped as a {@link BufferedInputStream}! Note that
* using an {@link InputStream} has a higher memory footprint * using an {@link InputStream} has a higher memory footprint
* than using a {@link File}.</p> * than using a {@link File}.</p>
* *
@ -150,16 +151,15 @@ public class WorkbookFactory {
/** /**
* Creates the appropriate HSSFWorkbook / XSSFWorkbook from * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
* the given InputStream, which may be password protected. * the given InputStream, which may be password protected.<p>
* <p>Your input stream MUST either support mark/reset, or
* be wrapped as a {@link PushbackInputStream}! Note that
* using an {@link InputStream} has a higher memory footprint
* than using a {@link File}.</p>
* *
* <p>Note that in order to properly release resources the * Note that using an {@link InputStream} has a higher memory footprint
* than using a {@link File}.<p>
*
* Note that in order to properly release resources the
* Workbook should be closed after use. Note also that loading * Workbook should be closed after use. Note also that loading
* from an InputStream requires more memory than loading * from an InputStream requires more memory than loading
* from a File, so prefer {@link #create(File)} where possible.</p> * from a File, so prefer {@link #create(File)} where possible.
* *
* @param inp The {@link InputStream} to read data from. * @param inp The {@link InputStream} to read data from.
* @param password The password that should be used or null if no password is necessary. * @param password The password that should be used or null if no password is necessary.
@ -172,24 +172,20 @@ public class WorkbookFactory {
* @throws EmptyFileException If an empty stream is given * @throws EmptyFileException If an empty stream is given
*/ */
public static Workbook create(InputStream inp, String password) throws IOException, InvalidFormatException, EncryptedDocumentException { public static Workbook create(InputStream inp, String password) throws IOException, InvalidFormatException, EncryptedDocumentException {
// If clearly doesn't do mark/reset, wrap up InputStream is = FileMagic.prepareToCheckMagic(inp);
if (! inp.markSupported()) {
inp = new PushbackInputStream(inp, 8);
}
// Ensure that there is at least some data there FileMagic fm = FileMagic.valueOf(is);
byte[] header8 = IOUtils.peekFirst8Bytes(inp);
// Try to create switch (fm) {
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) { case OLE2:
NPOIFSFileSystem fs = new NPOIFSFileSystem(inp); NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
return create(fs, password); return create(fs, password);
} case OOXML:
if (DocumentFactoryHelper.hasOOXMLHeader(inp)) { return new XSSFWorkbook(OPCPackage.open(is));
return new XSSFWorkbook(OPCPackage.open(inp)); default:
}
throw new InvalidFormatException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); throw new InvalidFormatException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
} }
}
/** /**
* Creates the appropriate HSSFWorkbook / XSSFWorkbook from * Creates the appropriate HSSFWorkbook / XSSFWorkbook from

View File

@ -20,7 +20,6 @@ package org.apache.poi.xssf.usermodel;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.PushbackInputStream;
import javax.xml.namespace.QName; import javax.xml.namespace.QName;
@ -29,7 +28,7 @@ import org.apache.poi.POIXMLException;
import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.ObjectData; import org.apache.poi.ss.usermodel.ObjectData;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
@ -161,17 +160,8 @@ public class XSSFObjectData extends XSSFSimpleShape implements ObjectData {
InputStream is = null; InputStream is = null;
try { try {
is = getObjectPart().getInputStream(); is = getObjectPart().getInputStream();
is = FileMagic.prepareToCheckMagic(is);
// If clearly doesn't do mark/reset, wrap up return FileMagic.valueOf(is) == FileMagic.OLE2;
if (! is.markSupported()) {
is = new PushbackInputStream(is, 8);
}
// Ensure that there is at least some data there
byte[] header8 = IOUtils.peekFirst8Bytes(is);
// Try to create
return NPOIFSFileSystem.hasPOIFSHeader(header8);
} catch (IOException e) { } catch (IOException e) {
LOG.log(POILogger.WARN, "can't determine if directory entry exists", e); LOG.log(POILogger.WARN, "can't determine if directory entry exists", e);
return false; return false;

View File

@ -19,68 +19,70 @@
package org.apache.poi; package org.apache.poi;
import java.io.ByteArrayInputStream; import static org.junit.Assert.assertArrayEquals;
import java.io.InputStream; import static org.junit.Assert.assertEquals;
import java.io.PushbackInputStream; import static org.junit.Assert.assertFalse;
import java.util.Arrays; import static org.junit.Assert.assertTrue;
import junit.framework.TestCase; import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.util.IOUtils;
import org.junit.Test;
/** /**
* Class to test that HXF correctly detects OOXML * Class to test that HXF correctly detects OOXML
* documents * documents
*/ */
public class TestDetectAsOOXML extends TestCase public class TestDetectAsOOXML {
{ @Test
public void testOpensProperly() throws Exception public void testOpensProperly() throws IOException, InvalidFormatException {
{
OPCPackage.open(HSSFTestDataSamples.openSampleFileStream("sample.xlsx")); OPCPackage.open(HSSFTestDataSamples.openSampleFileStream("sample.xlsx"));
} }
public void testDetectAsPOIFS() throws Exception { @Test
InputStream in; public void testDetectAsPOIFS() throws IOException {
Object fileAndMagic[][] = {
{ "SampleSS.xlsx", FileMagic.OOXML },
{ "SampleSS.xls", FileMagic.OLE2 },
{ "SampleSS.txt", FileMagic.UNKNOWN }
};
// ooxml file is for (Object fm[] : fileAndMagic) {
in = new PushbackInputStream( InputStream is = HSSFTestDataSamples.openSampleFileStream((String)fm[0]);
HSSFTestDataSamples.openSampleFileStream("SampleSS.xlsx"), 10 is = FileMagic.prepareToCheckMagic(is);
); FileMagic act = FileMagic.valueOf(is);
assertTrue(DocumentFactoryHelper.hasOOXMLHeader(in));
in.close();
// xls file isn't if (act == FileMagic.OOXML) {
in = new PushbackInputStream( assertTrue(DocumentFactoryHelper.hasOOXMLHeader(is));
HSSFTestDataSamples.openSampleFileStream("SampleSS.xls"), 10
);
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in));
in.close();
// text file isn't
in = new PushbackInputStream(
HSSFTestDataSamples.openSampleFileStream("SampleSS.txt"), 10
);
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in));
in.close();
} }
assertEquals("file magic failed for "+fm[0], fm[1], act);
is.close();
}
}
@Test
public void testFileCorruption() throws Exception { public void testFileCorruption() throws Exception {
// create test InputStream // create test InputStream
byte[] testData = { (byte)1, (byte)2, (byte)3 }; byte[] testData = { 1, 2, 3 };
ByteArrayInputStream testInput = new ByteArrayInputStream(testData); ByteArrayInputStream testInput = new ByteArrayInputStream(testData);
InputStream is = FileMagic.prepareToCheckMagic(testInput);
// detect header // detect header
InputStream in = new PushbackInputStream(testInput, 10); assertFalse(DocumentFactoryHelper.hasOOXMLHeader(is));
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in));
// check if InputStream is still intact // check if InputStream is still intact
byte[] test = new byte[3]; byte[] act = IOUtils.toByteArray(is);
assertEquals(3, in.read(test)); assertArrayEquals(testData, act);
assertTrue(Arrays.equals(testData, test)); assertEquals(-1, is.read());
assertEquals(-1, in.read()); is.close();
in.close();
} }
} }

View File

@ -17,29 +17,23 @@
package org.apache.poi.openxml4j.opc; package org.apache.poi.openxml4j.opc;
import org.apache.poi.*; import static org.junit.Assert.assertEquals;
import org.apache.poi.extractor.ExtractorFactory; import static org.junit.Assert.assertFalse;
import org.apache.poi.hssf.HSSFTestDataSamples; import static org.junit.Assert.assertNotNull;
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; import static org.junit.Assert.assertNull;
import org.apache.poi.openxml4j.exceptions.*; import static org.junit.Assert.assertTrue;
import org.apache.poi.openxml4j.opc.internal.ContentTypeManager; import static org.junit.Assert.fail;
import org.apache.poi.openxml4j.opc.internal.FileHelper;
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
import org.apache.poi.openxml4j.opc.internal.ZipHelper;
import org.apache.poi.openxml4j.util.ZipSecureFile;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.util.*;
import org.apache.poi.xssf.XSSFTestDataSamples;
import org.apache.xmlbeans.XmlException;
import org.junit.Ignore;
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import java.io.*; import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PushbackInputStream;
import java.lang.reflect.InvocationTargetException; import java.lang.reflect.InvocationTargetException;
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException; import java.net.URISyntaxException;
@ -52,7 +46,41 @@ import java.util.zip.ZipEntry;
import java.util.zip.ZipFile; import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream; import java.util.zip.ZipOutputStream;
import static org.junit.Assert.*; import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.POIDataSamples;
import org.apache.poi.POITestCase;
import org.apache.poi.POITextExtractor;
import org.apache.poi.POIXMLException;
import org.apache.poi.UnsupportedFileFormatException;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.internal.ContentTypeManager;
import org.apache.poi.openxml4j.opc.internal.FileHelper;
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
import org.apache.poi.openxml4j.opc.internal.ZipHelper;
import org.apache.poi.openxml4j.util.ZipSecureFile;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.util.DocumentHelper;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.util.TempFile;
import org.apache.poi.xssf.XSSFTestDataSamples;
import org.apache.xmlbeans.XmlException;
import org.junit.Ignore;
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public final class TestPackage { public final class TestPackage {
private static final POILogger logger = POILogFactory.getLogger(TestPackage.class); private static final POILogger logger = POILogFactory.getLogger(TestPackage.class);
@ -947,20 +975,32 @@ public final class TestPackage {
} }
// bug 60128 // bug 60128
@Test @Test(expected=NotOfficeXmlFileException.class)
public void testCorruptFile() throws IOException, InvalidFormatException { public void testCorruptFile() throws IOException, InvalidFormatException {
OPCPackage pkg = null;
File file = OpenXML4JTestDataSamples.getSampleFile("invalid.xlsx"); File file = OpenXML4JTestDataSamples.getSampleFile("invalid.xlsx");
OPCPackage.open(file, PackageAccess.READ);
}
// bug 61381
@Test
public void testTooShortFilterStreams() throws IOException, InvalidFormatException {
File xssf = OpenXML4JTestDataSamples.getSampleFile("sample.xlsx");
File hssf = POIDataSamples.getSpreadSheetInstance().getFile("SampleSS.xls");
InputStream isList[] = {
new PushbackInputStream(new FileInputStream(xssf), 2),
new BufferedInputStream(new FileInputStream(xssf), 2),
new PushbackInputStream(new FileInputStream(hssf), 2),
new BufferedInputStream(new FileInputStream(hssf), 2),
};
try { try {
pkg = OPCPackage.open(file, PackageAccess.READ); for (InputStream is : isList) {
} catch (NotOfficeXmlFileException e) { WorkbookFactory.create(is).close();
/*System.out.println(e.getClass().getName()); }
System.out.println(e.getMessage());
e.printStackTrace();*/
// ignore exception
} finally { } finally {
if (pkg != null) { for (InputStream is : isList) {
pkg.close(); IOUtils.closeQuietly(is);
} }
} }
} }

View File

@ -20,7 +20,6 @@ package org.apache.poi.hwpf;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.PushbackInputStream;
import java.security.GeneralSecurityException; import java.security.GeneralSecurityException;
import org.apache.poi.EncryptedDocumentException; import org.apache.poi.EncryptedDocumentException;
@ -47,6 +46,7 @@ import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentEntry; import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream; import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.BoundedInputStream; import org.apache.poi.util.BoundedInputStream;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
@ -116,22 +116,14 @@ public abstract class HWPFDocumentCore extends POIDocument {
* POIFSFileSystem from it, and returns that. * POIFSFileSystem from it, and returns that.
*/ */
public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException { public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException {
// Open a PushbackInputStream, so we can peek at the first few bytes InputStream is = FileMagic.prepareToCheckMagic(istream);
PushbackInputStream pis = new PushbackInputStream(istream,6); FileMagic fm = FileMagic.valueOf(is);
byte[] first6 = IOUtils.toByteArray(pis, 6);
// Does it start with {\rtf ? If so, it's really RTF if (fm != FileMagic.OLE2) {
if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r' throw new IllegalArgumentException("The document is really a "+fm+" file");
&& first6[3] == 't' && first6[4] == 'f') {
throw new IllegalArgumentException("The document is really a RTF file");
} else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) {
throw new IllegalArgumentException("The document is really a PDF file");
} }
// OK, so it's neither RTF nor PDF return new POIFSFileSystem(is);
// Open a POIFSFileSystem on the (pushed back) stream
pis.unread(first6);
return new POIFSFileSystem(pis);
} }
/** /**

View File

@ -22,7 +22,6 @@ import static org.apache.poi.POITestCase.assertContains;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.Arrays; import java.util.Arrays;
import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.HSSFTestDataSamples;
@ -86,8 +85,9 @@ public class TestOfficeXMLException extends TestCase {
// text file isn't // text file isn't
confirmIsPOIFS("SampleSS.txt", false); confirmIsPOIFS("SampleSS.txt", false);
} }
private void confirmIsPOIFS(String sampleFileName, boolean expectedResult) throws IOException { private void confirmIsPOIFS(String sampleFileName, boolean expectedResult) throws IOException {
InputStream in = new PushbackInputStream(openSampleStream(sampleFileName), 10); InputStream in = FileMagic.prepareToCheckMagic(openSampleStream(sampleFileName));
try { try {
boolean actualResult; boolean actualResult;
try { try {
@ -108,7 +108,7 @@ public class TestOfficeXMLException extends TestCase {
InputStream testInput = new ByteArrayInputStream(testData); InputStream testInput = new ByteArrayInputStream(testData);
// detect header // detect header
InputStream in = new PushbackInputStream(testInput, 10); InputStream in = FileMagic.prepareToCheckMagic(testInput);
assertFalse(POIFSFileSystem.hasPOIFSHeader(in)); assertFalse(POIFSFileSystem.hasPOIFSHeader(in));
// check if InputStream is still intact // check if InputStream is still intact
@ -126,7 +126,7 @@ public class TestOfficeXMLException extends TestCase {
InputStream testInput = new ByteArrayInputStream(testData); InputStream testInput = new ByteArrayInputStream(testData);
// detect header // detect header
InputStream in = new PushbackInputStream(testInput, 10); InputStream in = FileMagic.prepareToCheckMagic(testInput);
assertFalse(OPOIFSFileSystem.hasPOIFSHeader(in)); assertFalse(OPOIFSFileSystem.hasPOIFSHeader(in));
// check if InputStream is still intact // check if InputStream is still intact