diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index ff277d20d..a0e36343a 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 45290 - Support odd files where the POIFS header block comes after the data blocks, and is on the data blocks list 46184 - More odd escaped date formats Include the sheet number in the output of XLS2CSVmra 46043 - correctly write out HPSF properties with HWPF diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 18922b442..4d2b1a997 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 45290 - Support odd files where the POIFS header block comes after the data blocks, and is on the data blocks list 46184 - More odd escaped date formats Include the sheet number in the output of XLS2CSVmra 46043 - correctly write out HPSF properties with HWPF diff --git a/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReader.java b/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReader.java index 73911e6b0..6a1423ea4 100644 --- a/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReader.java +++ b/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReader.java @@ -245,13 +245,13 @@ public class POIFSReader { document = new POIFSDocument(name, small_blocks - .fetchBlocks(startBlock), size); + .fetchBlocks(startBlock, -1), size); } else { document = new POIFSDocument(name, big_blocks - .fetchBlocks(startBlock), size); + .fetchBlocks(startBlock, -1), size); } while (listeners.hasNext()) { @@ -270,11 +270,11 @@ public class POIFSReader // consume the document's data and discard it if (property.shouldUseSmallBlocks()) { - small_blocks.fetchBlocks(startBlock); + small_blocks.fetchBlocks(startBlock, -1); } else { - big_blocks.fetchBlocks(startBlock); + big_blocks.fetchBlocks(startBlock, -1); } } } diff --git a/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java b/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java index 92348ceaf..6ea37ea38 100644 --- a/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java +++ b/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java @@ -173,11 +173,16 @@ public class POIFSFileSystem data_blocks); // init documents - processProperties(SmallBlockTableReader - .getSmallDocumentBlocks(data_blocks, properties - .getRoot(), header_block_reader - .getSBATStart()), data_blocks, properties.getRoot() - .getChildren(), null); + processProperties( + SmallBlockTableReader.getSmallDocumentBlocks( + data_blocks, properties.getRoot(), + header_block_reader.getSBATStart() + ), + data_blocks, + properties.getRoot().getChildren(), + null, + header_block_reader.getPropertyStart() + ); } /** * @param stream the stream to be closed @@ -491,7 +496,8 @@ public class POIFSFileSystem private void processProperties(final BlockList small_blocks, final BlockList big_blocks, final Iterator properties, - final DirectoryNode dir) + final DirectoryNode dir, + final int headerPropertiesStartAt) throws IOException { while (properties.hasNext()) @@ -511,7 +517,8 @@ public class POIFSFileSystem processProperties( small_blocks, big_blocks, - (( DirectoryProperty ) property).getChildren(), new_dir); + (( DirectoryProperty ) property).getChildren(), + new_dir, headerPropertiesStartAt); } else { @@ -522,14 +529,15 @@ public class POIFSFileSystem if (property.shouldUseSmallBlocks()) { document = - new POIFSDocument(name, small_blocks - .fetchBlocks(startBlock), size); + new POIFSDocument(name, + small_blocks.fetchBlocks(startBlock, headerPropertiesStartAt), + size); } else { document = new POIFSDocument(name, - big_blocks.fetchBlocks(startBlock), + big_blocks.fetchBlocks(startBlock, headerPropertiesStartAt), size); } parent.createDocument(document); diff --git a/src/java/org/apache/poi/poifs/property/PropertyTable.java b/src/java/org/apache/poi/poifs/property/PropertyTable.java index 11c56e886..00b306b79 100644 --- a/src/java/org/apache/poi/poifs/property/PropertyTable.java +++ b/src/java/org/apache/poi/poifs/property/PropertyTable.java @@ -78,7 +78,7 @@ public class PropertyTable _blocks = null; _properties = PropertyFactory - .convertToProperties(blockList.fetchBlocks(startBlock)); + .convertToProperties(blockList.fetchBlocks(startBlock, -1)); populatePropertyTree(( DirectoryProperty ) _properties.get(0)); } diff --git a/src/java/org/apache/poi/poifs/storage/BlockAllocationTableReader.java b/src/java/org/apache/poi/poifs/storage/BlockAllocationTableReader.java index b167d2fca..1016db9c4 100644 --- a/src/java/org/apache/poi/poifs/storage/BlockAllocationTableReader.java +++ b/src/java/org/apache/poi/poifs/storage/BlockAllocationTableReader.java @@ -179,17 +179,34 @@ public class BlockAllocationTableReader */ ListManagedBlock [] fetchBlocks(final int startBlock, + final int headerPropertiesStartBlock, final BlockList blockList) throws IOException { List blocks = new ArrayList(); int currentBlock = startBlock; - - while (currentBlock != POIFSConstants.END_OF_CHAIN) - { - blocks.add(blockList.remove(currentBlock)); - currentBlock = _entries.get(currentBlock); + boolean firstPass = true; + + // Process the chain from the start to the end + // Normally we have header, data, end + // Sometimes we have data, header, end + // For those cases, stop at the header, not the end + while (currentBlock != POIFSConstants.END_OF_CHAIN) { + try { + blocks.add(blockList.remove(currentBlock)); + currentBlock = _entries.get(currentBlock); + } catch(IOException e) { + if(currentBlock == headerPropertiesStartBlock) { + // Special case where things are in the wrong order + System.err.println("Warning, header block comes after data blocks in POIFS block listing"); + currentBlock = POIFSConstants.END_OF_CHAIN; + } else { + // Ripple up + throw e; + } + } } + return ( ListManagedBlock [] ) blocks .toArray(new ListManagedBlock[ 0 ]); } diff --git a/src/java/org/apache/poi/poifs/storage/BlockList.java b/src/java/org/apache/poi/poifs/storage/BlockList.java index 2352d3a2c..a5eb7df21 100644 --- a/src/java/org/apache/poi/poifs/storage/BlockList.java +++ b/src/java/org/apache/poi/poifs/storage/BlockList.java @@ -59,13 +59,14 @@ public interface BlockList * blocks are removed from the list. * * @param startBlock the index of the first block in the stream + * @param headerPropertiesStartBlock the index of the first header block in the stream * * @return the stream as an array of correctly ordered blocks * * @exception IOException if blocks are missing */ - public ListManagedBlock [] fetchBlocks(final int startBlock) + public ListManagedBlock [] fetchBlocks(final int startBlock, final int headerPropertiesStartBlock) throws IOException; /** diff --git a/src/java/org/apache/poi/poifs/storage/BlockListImpl.java b/src/java/org/apache/poi/poifs/storage/BlockListImpl.java index 7e44fda3f..f315b5d51 100644 --- a/src/java/org/apache/poi/poifs/storage/BlockListImpl.java +++ b/src/java/org/apache/poi/poifs/storage/BlockListImpl.java @@ -94,8 +94,10 @@ class BlockListImpl result = _blocks[ index ]; if (result == null) { - throw new IOException("block[ " + index - + " ] already removed"); + throw new IOException( + "block[ " + index + " ] already removed - " + + "does your POIFS have circular or duplicate block references?" + ); } _blocks[ index ] = null; } @@ -119,7 +121,7 @@ class BlockListImpl * @exception IOException if blocks are missing */ - public ListManagedBlock [] fetchBlocks(final int startBlock) + public ListManagedBlock [] fetchBlocks(final int startBlock, final int headerPropertiesStartBlock) throws IOException { if (_bat == null) @@ -127,7 +129,7 @@ class BlockListImpl throw new IOException( "Improperly initialized list: no block allocation table provided"); } - return _bat.fetchBlocks(startBlock, this); + return _bat.fetchBlocks(startBlock, headerPropertiesStartBlock, this); } /** diff --git a/src/java/org/apache/poi/poifs/storage/SmallBlockTableReader.java b/src/java/org/apache/poi/poifs/storage/SmallBlockTableReader.java index e70774aa1..4da79e71f 100644 --- a/src/java/org/apache/poi/poifs/storage/SmallBlockTableReader.java +++ b/src/java/org/apache/poi/poifs/storage/SmallBlockTableReader.java @@ -56,9 +56,9 @@ public class SmallBlockTableReader { BlockList list = new SmallDocumentBlockList(SmallDocumentBlock - .extract(blockList.fetchBlocks(root.getStartBlock()))); + .extract(blockList.fetchBlocks(root.getStartBlock(), -1))); - new BlockAllocationTableReader(blockList.fetchBlocks(sbatStart), + new BlockAllocationTableReader(blockList.fetchBlocks(sbatStart, -1), list); return list; } diff --git a/src/testcases/org/apache/poi/hssf/data/45290.xls b/src/testcases/org/apache/poi/hssf/data/45290.xls new file mode 100644 index 000000000..dd064497a Binary files /dev/null and b/src/testcases/org/apache/poi/hssf/data/45290.xls differ diff --git a/src/testcases/org/apache/poi/hssf/usermodel/TestBugs.java b/src/testcases/org/apache/poi/hssf/usermodel/TestBugs.java index 024f94e5b..402ee797a 100644 --- a/src/testcases/org/apache/poi/hssf/usermodel/TestBugs.java +++ b/src/testcases/org/apache/poi/hssf/usermodel/TestBugs.java @@ -27,9 +27,6 @@ import java.util.List; import junit.framework.AssertionFailedError; import junit.framework.TestCase; -import org.apache.poi.ss.util.Region; -import org.apache.poi.ss.util.CellRangeAddress; - import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.model.Workbook; import org.apache.poi.hssf.record.CellValueRecordInterface; @@ -38,6 +35,7 @@ import org.apache.poi.hssf.record.NameRecord; import org.apache.poi.hssf.record.aggregates.FormulaRecordAggregate; import org.apache.poi.hssf.record.formula.DeletedArea3DPtg; import org.apache.poi.hssf.record.formula.Ptg; +import org.apache.poi.ss.util.CellRangeAddress; import org.apache.poi.util.TempFile; /** @@ -1533,4 +1531,13 @@ public final class TestBugs extends TestCase { assertEquals(7, wb.getNumberOfSheets()); wb = HSSFTestDataSamples.writeOutAndReadBack(wb); } + + /** + * Odd POIFS blocks issue: + * block[ 44 ] already removed from org.apache.poi.poifs.storage.BlockListImpl.remove + */ + public void test45290() { + HSSFWorkbook wb = openSample("45290.xls"); + assertEquals(1, wb.getNumberOfSheets()); + } } diff --git a/src/testcases/org/apache/poi/poifs/storage/TestBlockAllocationTableReader.java b/src/testcases/org/apache/poi/poifs/storage/TestBlockAllocationTableReader.java index a209de6c8..d688ce145 100644 --- a/src/testcases/org/apache/poi/poifs/storage/TestBlockAllocationTableReader.java +++ b/src/testcases/org/apache/poi/poifs/storage/TestBlockAllocationTableReader.java @@ -1325,7 +1325,7 @@ public class TestBlockAllocationTableReader try { ListManagedBlock[] dataBlocks = - table.fetchBlocks(start_blocks[ j ], list); + table.fetchBlocks(start_blocks[ j ], -1, list); if (expected_length[ j ] == -1) { diff --git a/src/testcases/org/apache/poi/poifs/storage/TestBlockListImpl.java b/src/testcases/org/apache/poi/poifs/storage/TestBlockListImpl.java index 6b8d091a5..194a951a1 100644 --- a/src/testcases/org/apache/poi/poifs/storage/TestBlockListImpl.java +++ b/src/testcases/org/apache/poi/poifs/storage/TestBlockListImpl.java @@ -285,7 +285,7 @@ public class TestBlockListImpl try { ListManagedBlock[] dataBlocks = - list.fetchBlocks(start_blocks[ j ]); + list.fetchBlocks(start_blocks[ j ], -1); if (expected_length[ j ] == -1) {