diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 0891579ed..bd533988e 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 48085 - improved error checking in BlockAllocationTableReader to trap unreasonable field values 47924 - fixed logic for matching cells and comments in HSSFCell.getCellComment() 47942 - added implementation of protection features to XLSX and DOCX files 48070 - preserve leading and trailing white spaces in XSSFRichTextString diff --git a/src/java/org/apache/poi/poifs/storage/BlockAllocationTableReader.java b/src/java/org/apache/poi/poifs/storage/BlockAllocationTableReader.java index 0fb3b4899..31a260939 100644 --- a/src/java/org/apache/poi/poifs/storage/BlockAllocationTableReader.java +++ b/src/java/org/apache/poi/poifs/storage/BlockAllocationTableReader.java @@ -42,7 +42,20 @@ import org.apache.poi.util.LittleEndianConsts; * @author Marc Johnson (mjohnson at apache dot org) */ public final class BlockAllocationTableReader { - private IntList _entries; + + /** + * Maximum number size (in blocks) of the allocation table as supported by + * POI.
+ * + * This constant has been chosen to help POI identify corrupted data in the + * header block (rather than crash immediately with {@link OutOfMemoryError} + * ). It's not clear if the compound document format actually specifies any + * upper limits. For files with 512 byte blocks, having an allocation table + * of 65,335 blocks would correspond to a total file size of 4GB. Needless + * to say, POI probably cannot handle files anywhere near that size. + */ + private static final int MAX_BLOCK_COUNT = 65535; + private final IntList _entries; /** * create a BlockAllocationTableReader for an existing filesystem. Side @@ -62,22 +75,20 @@ public final class BlockAllocationTableReader { * @exception IOException if, in trying to create the table, we * encounter logic errors */ - - public BlockAllocationTableReader(final int block_count, - final int [] block_array, - final int xbat_count, - final int xbat_index, - final BlockList raw_block_list) - throws IOException - { + public BlockAllocationTableReader(int block_count, int [] block_array, + int xbat_count, int xbat_index, BlockList raw_block_list) throws IOException { this(); - if (block_count <= 0) - { + if (block_count <= 0) { throw new IOException( "Illegal block count; minimum count is 1, got " + block_count + " instead"); } + if (block_count > MAX_BLOCK_COUNT) { + throw new IOException("Block count " + block_count + + " is too high. POI maximum is " + MAX_BLOCK_COUNT + "."); + } + // acquire raw data blocks containing the BAT block data RawDataBlock blocks[] = new RawDataBlock[ block_count ]; int limit = Math.min(block_count, block_array.length); @@ -141,17 +152,13 @@ public final class BlockAllocationTableReader { * * @exception IOException */ - - BlockAllocationTableReader(final ListManagedBlock [] blocks, - final BlockList raw_block_list) - throws IOException - { + BlockAllocationTableReader(ListManagedBlock[] blocks, BlockList raw_block_list) + throws IOException { this(); setEntries(blocks, raw_block_list); } - BlockAllocationTableReader() - { + BlockAllocationTableReader() { _entries = new IntList(); } @@ -167,11 +174,8 @@ public final class BlockAllocationTableReader { * * @exception IOException if there is a problem acquiring the blocks */ - ListManagedBlock [] fetchBlocks(final int startBlock, - final int headerPropertiesStartBlock, - final BlockList blockList) - throws IOException - { + ListManagedBlock[] fetchBlocks(int startBlock, int headerPropertiesStartBlock, + BlockList blockList) throws IOException { List blocks = new ArrayList(); int currentBlock = startBlock; boolean firstPass = true; @@ -182,28 +186,28 @@ public final class BlockAllocationTableReader { // Sometimes we have data, header, end // For those cases, stop at the header, not the end while (currentBlock != POIFSConstants.END_OF_CHAIN) { - try { - // Grab the data at the current block offset - dataBlock = blockList.remove(currentBlock); - blocks.add(dataBlock); - // Now figure out which block we go to next + try { + // Grab the data at the current block offset + dataBlock = blockList.remove(currentBlock); + blocks.add(dataBlock); + // Now figure out which block we go to next currentBlock = _entries.get(currentBlock); firstPass = false; - } catch(IOException e) { - if(currentBlock == headerPropertiesStartBlock) { - // Special case where things are in the wrong order - System.err.println("Warning, header block comes after data blocks in POIFS block listing"); - currentBlock = POIFSConstants.END_OF_CHAIN; - } else if(currentBlock == 0 && firstPass) { - // Special case where the termination isn't done right - // on an empty set - System.err.println("Warning, incorrectly terminated empty data blocks in POIFS block listing (should end at -2, ended at 0)"); - currentBlock = POIFSConstants.END_OF_CHAIN; - } else { - // Ripple up - throw e; - } - } + } catch(IOException e) { + if(currentBlock == headerPropertiesStartBlock) { + // Special case where things are in the wrong order + System.err.println("Warning, header block comes after data blocks in POIFS block listing"); + currentBlock = POIFSConstants.END_OF_CHAIN; + } else if(currentBlock == 0 && firstPass) { + // Special case where the termination isn't done right + // on an empty set + System.err.println("Warning, incorrectly terminated empty data blocks in POIFS block listing (should end at -2, ended at 0)"); + currentBlock = POIFSConstants.END_OF_CHAIN; + } else { + // Ripple up + throw e; + } + } } return blocks.toArray(new ListManagedBlock[blocks.size()]); @@ -218,17 +222,14 @@ public final class BlockAllocationTableReader { * * @return true if the specific block is used, else false */ - boolean isUsed(final int index) - { - boolean rval = false; + boolean isUsed(int index) { - try - { - rval = _entries.get(index) != -1; + try { + return _entries.get(index) != -1; } catch (IndexOutOfBoundsException e) { // ignored + return false; } - return rval; } /** @@ -242,11 +243,8 @@ public final class BlockAllocationTableReader { * * @exception IOException if the current block is unused */ - int getNextBlockIndex(final int index) - throws IOException - { - if (isUsed(index)) - { + int getNextBlockIndex(int index) throws IOException { + if (isUsed(index)) { return _entries.get(index); } throw new IOException("index " + index + " is unused"); @@ -259,10 +257,7 @@ public final class BlockAllocationTableReader { * @param raw_blocks the list of blocks being managed. Unused * blocks will be eliminated from the list */ - private void setEntries(final ListManagedBlock [] blocks, - final BlockList raw_blocks) - throws IOException - { + private void setEntries(ListManagedBlock[] blocks, BlockList raw_blocks) throws IOException { int limit = BATBlock.entriesPerBlock(); for (int block_index = 0; block_index < blocks.length; block_index++) diff --git a/src/java/org/apache/poi/poifs/storage/HeaderBlockReader.java b/src/java/org/apache/poi/poifs/storage/HeaderBlockReader.java index f0ff068ec..1cd5dd690 100644 --- a/src/java/org/apache/poi/poifs/storage/HeaderBlockReader.java +++ b/src/java/org/apache/poi/poifs/storage/HeaderBlockReader.java @@ -47,25 +47,25 @@ public final class HeaderBlockReader { * What big block size the file uses. Most files * use 512 bytes, but a few use 4096 */ - private int bigBlockSize = POIFSConstants.BIG_BLOCK_SIZE; + private final int bigBlockSize; /** number of big block allocation table blocks (int) */ - private int _bat_count; + private final int _bat_count; /** start of the property set block (int index of the property set * chain's first big block) */ - private int _property_start; + private final int _property_start; /** start of the small block allocation table (int index of small * block allocation table's first big block) */ - private int _sbat_start; + private final int _sbat_start; /** big block index for extension to the big block allocation table */ - private int _xbat_start; - private int _xbat_count; - private byte[] _data; + private final int _xbat_start; + private final int _xbat_count; + private final byte[] _data; /** * create a new HeaderBlockReader from an InputStream @@ -82,32 +82,19 @@ public final class HeaderBlockReader { byte[] blockStart = new byte[32]; int bsCount = IOUtils.readFully(stream, blockStart); if(bsCount != 32) { - alertShortRead(bsCount); - } - - // Figure out our block size - if(blockStart[30] == 12) { - bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE; - } - _data = new byte[ bigBlockSize ]; - System.arraycopy(blockStart, 0, _data, 0, blockStart.length); - - // Now we can read the rest of our header - int byte_count = IOUtils.readFully(stream, _data, blockStart.length, _data.length - blockStart.length); - if (byte_count+bsCount != bigBlockSize) { - alertShortRead(byte_count); + throw alertShortRead(bsCount, 32); } // verify signature - long signature = LittleEndian.getLong(_data, _signature_offset); + long signature = LittleEndian.getLong(blockStart, _signature_offset); if (signature != _signature) { // Is it one of the usual suspects? byte[] OOXML_FILE_HEADER = POIFSConstants.OOXML_FILE_HEADER; - if(_data[0] == OOXML_FILE_HEADER[0] && - _data[1] == OOXML_FILE_HEADER[1] && - _data[2] == OOXML_FILE_HEADER[2] && - _data[3] == OOXML_FILE_HEADER[3]) { + if(blockStart[0] == OOXML_FILE_HEADER[0] && + blockStart[1] == OOXML_FILE_HEADER[1] && + blockStart[2] == OOXML_FILE_HEADER[2] && + blockStart[3] == OOXML_FILE_HEADER[3]) { throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. You are calling the part of POI that deals with OLE2 Office Documents. You need to call a different part of POI to process this data (eg XSSF instead of HSSF)"); } if ((signature & 0xFF8FFFFFFFFFFFFFL) == 0x0010000200040009L) { @@ -121,13 +108,34 @@ public final class HeaderBlockReader { + longToHex(signature) + ", expected " + longToHex(_signature)); } + + + // Figure out our block size + switch (blockStart[30]) { + case 12: + bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE; break; + case 9: + bigBlockSize = POIFSConstants.BIG_BLOCK_SIZE; break; + default: + throw new IOException("Unsupported blocksize (2^" + + blockStart[30] + "). Expected 2^9 or 2^12."); + } + _data = new byte[ bigBlockSize ]; + System.arraycopy(blockStart, 0, _data, 0, blockStart.length); + + // Now we can read the rest of our header + int byte_count = IOUtils.readFully(stream, _data, blockStart.length, _data.length - blockStart.length); + if (byte_count+bsCount != bigBlockSize) { + throw alertShortRead(byte_count, bigBlockSize); + } + _bat_count = getInt(_bat_count_offset, _data); _property_start = getInt(_property_start_offset, _data); _sbat_start = getInt(_sbat_start_offset, _data); _xbat_start = getInt(_xbat_start_offset, _data); _xbat_count = getInt(_xbat_count_offset, _data); } - + private static int getInt(int offset, byte[] data) { return LittleEndian.getInt(data, offset); } @@ -136,7 +144,7 @@ public final class HeaderBlockReader { return new String(HexDump.longToHex(value)); } - private void alertShortRead(int pRead) throws IOException { + private static IOException alertShortRead(int pRead, int expectedReadSize) { int read; if (pRead < 0) { //Can't have -1 bytes read in the error message! @@ -146,9 +154,9 @@ public final class HeaderBlockReader { } String type = " byte" + (read == 1 ? (""): ("s")); - throw new IOException("Unable to read entire header; " + return new IOException("Unable to read entire header; " + read + type + " read; expected " - + bigBlockSize + " bytes"); + + expectedReadSize + " bytes"); } /** @@ -201,7 +209,7 @@ public final class HeaderBlockReader { public int getXBATIndex() { return _xbat_start; } - + /** * @return The Big Block size, normally 512 bytes, sometimes 4096 bytes */ @@ -209,4 +217,3 @@ public final class HeaderBlockReader { return bigBlockSize; } } - diff --git a/src/testcases/org/apache/poi/poifs/storage/TestBlockAllocationTableReader.java b/src/testcases/org/apache/poi/poifs/storage/TestBlockAllocationTableReader.java index 51e1fc40b..9acef41df 100644 --- a/src/testcases/org/apache/poi/poifs/storage/TestBlockAllocationTableReader.java +++ b/src/testcases/org/apache/poi/poifs/storage/TestBlockAllocationTableReader.java @@ -20,10 +20,13 @@ package org.apache.poi.poifs.storage; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.util.Arrays; +import junit.framework.AssertionFailedError; import junit.framework.TestCase; import org.apache.poi.poifs.common.POIFSConstants; +import org.apache.poi.util.HexRead; import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndianConsts; @@ -225,6 +228,7 @@ public final class TestBlockAllocationTableReader extends TestCase { small_blocks.remove(j); fail("removing block " + j + " should have failed"); } catch (IOException ignored) { + // expected during successful test } } } @@ -373,4 +377,45 @@ public final class TestBlockAllocationTableReader extends TestCase { } } } + + /** + * Bugzilla 48085 describes an error where a corrupted Excel file causes POI to throw an + * {@link OutOfMemoryError}. + */ + public void testBadSectorAllocationTableSize_bug48085() { + int BLOCK_SIZE = 512; + // 512 bytes take from the start of bugzilla attachment 24444 + byte[] initData = HexRead.readFromString( + + "D0 CF 11 E0 A1 B1 1A E1 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 3E 20 03 20 FE FF 09 20" + + "06 20 20 20 20 20 20 20 20 20 20 20 01 20 20 20 01 20 20 20 20 20 20 20 20 10 20 20 02 20 20 20" + + "02 20 20 20 FE FF FF FF 20 20 20 20 20 20 20 20 " + ); + // the rest of the block is 'FF' + byte[] data = new byte[BLOCK_SIZE]; + Arrays.fill(data, (byte)0xFF); + System.arraycopy(initData, 0, data, 0, initData.length); + + // similar code to POIFSFileSystem.: + InputStream stream = new ByteArrayInputStream(data); + HeaderBlockReader hb; + RawDataBlockList dataBlocks; + try { + hb = new HeaderBlockReader(stream); + dataBlocks = new RawDataBlockList(stream, BLOCK_SIZE); + } catch (IOException e) { + throw new RuntimeException(e); + } + try { + new BlockAllocationTableReader(hb.getBATCount(), hb.getBATArray(), hb.getXBATCount(), + hb.getXBATIndex(), dataBlocks); + } catch (IOException e) { + // expected during successful test + assertEquals("Block count 538976257 is too high. POI maximum is 65535.", e.getMessage()); + } catch (OutOfMemoryError e) { + if (e.getStackTrace()[1].getMethodName().equals("testBadSectorAllocationTableSize")) { + throw new AssertionFailedError("Identified bug 48085"); + } + } + } }