From cf20f3397fc733e9efc0200389cbce758e5bac5c Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Wed, 12 Nov 2008 18:25:33 +0000 Subject: [PATCH] Fix bug #45290 - Support odd files where the POIFS header block comes after the data blocks, and is on the data blocks list git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@713447 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/changes.xml | 1 + src/documentation/content/xdocs/status.xml | 1 + .../poifs/eventfilesystem/POIFSReader.java | 8 ++--- .../poi/poifs/filesystem/POIFSFileSystem.java | 28 +++++++++++------- .../poi/poifs/property/PropertyTable.java | 2 +- .../storage/BlockAllocationTableReader.java | 27 +++++++++++++---- .../apache/poi/poifs/storage/BlockList.java | 3 +- .../poi/poifs/storage/BlockListImpl.java | 10 ++++--- .../poifs/storage/SmallBlockTableReader.java | 4 +-- .../org/apache/poi/hssf/data/45290.xls | Bin 0 -> 23552 bytes .../apache/poi/hssf/usermodel/TestBugs.java | 13 ++++++-- .../TestBlockAllocationTableReader.java | 2 +- .../poi/poifs/storage/TestBlockListImpl.java | 2 +- 13 files changed, 69 insertions(+), 32 deletions(-) create mode 100644 src/testcases/org/apache/poi/hssf/data/45290.xls diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index ff277d20d..a0e36343a 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 45290 - Support odd files where the POIFS header block comes after the data blocks, and is on the data blocks list 46184 - More odd escaped date formats Include the sheet number in the output of XLS2CSVmra 46043 - correctly write out HPSF properties with HWPF diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 18922b442..4d2b1a997 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 45290 - Support odd files where the POIFS header block comes after the data blocks, and is on the data blocks list 46184 - More odd escaped date formats Include the sheet number in the output of XLS2CSVmra 46043 - correctly write out HPSF properties with HWPF diff --git a/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReader.java b/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReader.java index 73911e6b0..6a1423ea4 100644 --- a/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReader.java +++ b/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReader.java @@ -245,13 +245,13 @@ public class POIFSReader { document = new POIFSDocument(name, small_blocks - .fetchBlocks(startBlock), size); + .fetchBlocks(startBlock, -1), size); } else { document = new POIFSDocument(name, big_blocks - .fetchBlocks(startBlock), size); + .fetchBlocks(startBlock, -1), size); } while (listeners.hasNext()) { @@ -270,11 +270,11 @@ public class POIFSReader // consume the document's data and discard it if (property.shouldUseSmallBlocks()) { - small_blocks.fetchBlocks(startBlock); + small_blocks.fetchBlocks(startBlock, -1); } else { - big_blocks.fetchBlocks(startBlock); + big_blocks.fetchBlocks(startBlock, -1); } } } diff --git a/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java b/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java index 92348ceaf..6ea37ea38 100644 --- a/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java +++ b/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java @@ -173,11 +173,16 @@ public class POIFSFileSystem data_blocks); // init documents - processProperties(SmallBlockTableReader - .getSmallDocumentBlocks(data_blocks, properties - .getRoot(), header_block_reader - .getSBATStart()), data_blocks, properties.getRoot() - .getChildren(), null); + processProperties( + SmallBlockTableReader.getSmallDocumentBlocks( + data_blocks, properties.getRoot(), + header_block_reader.getSBATStart() + ), + data_blocks, + properties.getRoot().getChildren(), + null, + header_block_reader.getPropertyStart() + ); } /** * @param stream the stream to be closed @@ -491,7 +496,8 @@ public class POIFSFileSystem private void processProperties(final BlockList small_blocks, final BlockList big_blocks, final Iterator properties, - final DirectoryNode dir) + final DirectoryNode dir, + final int headerPropertiesStartAt) throws IOException { while (properties.hasNext()) @@ -511,7 +517,8 @@ public class POIFSFileSystem processProperties( small_blocks, big_blocks, - (( DirectoryProperty ) property).getChildren(), new_dir); + (( DirectoryProperty ) property).getChildren(), + new_dir, headerPropertiesStartAt); } else { @@ -522,14 +529,15 @@ public class POIFSFileSystem if (property.shouldUseSmallBlocks()) { document = - new POIFSDocument(name, small_blocks - .fetchBlocks(startBlock), size); + new POIFSDocument(name, + small_blocks.fetchBlocks(startBlock, headerPropertiesStartAt), + size); } else { document = new POIFSDocument(name, - big_blocks.fetchBlocks(startBlock), + big_blocks.fetchBlocks(startBlock, headerPropertiesStartAt), size); } parent.createDocument(document); diff --git a/src/java/org/apache/poi/poifs/property/PropertyTable.java b/src/java/org/apache/poi/poifs/property/PropertyTable.java index 11c56e886..00b306b79 100644 --- a/src/java/org/apache/poi/poifs/property/PropertyTable.java +++ b/src/java/org/apache/poi/poifs/property/PropertyTable.java @@ -78,7 +78,7 @@ public class PropertyTable _blocks = null; _properties = PropertyFactory - .convertToProperties(blockList.fetchBlocks(startBlock)); + .convertToProperties(blockList.fetchBlocks(startBlock, -1)); populatePropertyTree(( DirectoryProperty ) _properties.get(0)); } diff --git a/src/java/org/apache/poi/poifs/storage/BlockAllocationTableReader.java b/src/java/org/apache/poi/poifs/storage/BlockAllocationTableReader.java index b167d2fca..1016db9c4 100644 --- a/src/java/org/apache/poi/poifs/storage/BlockAllocationTableReader.java +++ b/src/java/org/apache/poi/poifs/storage/BlockAllocationTableReader.java @@ -179,17 +179,34 @@ public class BlockAllocationTableReader */ ListManagedBlock [] fetchBlocks(final int startBlock, + final int headerPropertiesStartBlock, final BlockList blockList) throws IOException { List blocks = new ArrayList(); int currentBlock = startBlock; - - while (currentBlock != POIFSConstants.END_OF_CHAIN) - { - blocks.add(blockList.remove(currentBlock)); - currentBlock = _entries.get(currentBlock); + boolean firstPass = true; + + // Process the chain from the start to the end + // Normally we have header, data, end + // Sometimes we have data, header, end + // For those cases, stop at the header, not the end + while (currentBlock != POIFSConstants.END_OF_CHAIN) { + try { + blocks.add(blockList.remove(currentBlock)); + currentBlock = _entries.get(currentBlock); + } catch(IOException e) { + if(currentBlock == headerPropertiesStartBlock) { + // Special case where things are in the wrong order + System.err.println("Warning, header block comes after data blocks in POIFS block listing"); + currentBlock = POIFSConstants.END_OF_CHAIN; + } else { + // Ripple up + throw e; + } + } } + return ( ListManagedBlock [] ) blocks .toArray(new ListManagedBlock[ 0 ]); } diff --git a/src/java/org/apache/poi/poifs/storage/BlockList.java b/src/java/org/apache/poi/poifs/storage/BlockList.java index 2352d3a2c..a5eb7df21 100644 --- a/src/java/org/apache/poi/poifs/storage/BlockList.java +++ b/src/java/org/apache/poi/poifs/storage/BlockList.java @@ -59,13 +59,14 @@ public interface BlockList * blocks are removed from the list. * * @param startBlock the index of the first block in the stream + * @param headerPropertiesStartBlock the index of the first header block in the stream * * @return the stream as an array of correctly ordered blocks * * @exception IOException if blocks are missing */ - public ListManagedBlock [] fetchBlocks(final int startBlock) + public ListManagedBlock [] fetchBlocks(final int startBlock, final int headerPropertiesStartBlock) throws IOException; /** diff --git a/src/java/org/apache/poi/poifs/storage/BlockListImpl.java b/src/java/org/apache/poi/poifs/storage/BlockListImpl.java index 7e44fda3f..f315b5d51 100644 --- a/src/java/org/apache/poi/poifs/storage/BlockListImpl.java +++ b/src/java/org/apache/poi/poifs/storage/BlockListImpl.java @@ -94,8 +94,10 @@ class BlockListImpl result = _blocks[ index ]; if (result == null) { - throw new IOException("block[ " + index - + " ] already removed"); + throw new IOException( + "block[ " + index + " ] already removed - " + + "does your POIFS have circular or duplicate block references?" + ); } _blocks[ index ] = null; } @@ -119,7 +121,7 @@ class BlockListImpl * @exception IOException if blocks are missing */ - public ListManagedBlock [] fetchBlocks(final int startBlock) + public ListManagedBlock [] fetchBlocks(final int startBlock, final int headerPropertiesStartBlock) throws IOException { if (_bat == null) @@ -127,7 +129,7 @@ class BlockListImpl throw new IOException( "Improperly initialized list: no block allocation table provided"); } - return _bat.fetchBlocks(startBlock, this); + return _bat.fetchBlocks(startBlock, headerPropertiesStartBlock, this); } /** diff --git a/src/java/org/apache/poi/poifs/storage/SmallBlockTableReader.java b/src/java/org/apache/poi/poifs/storage/SmallBlockTableReader.java index e70774aa1..4da79e71f 100644 --- a/src/java/org/apache/poi/poifs/storage/SmallBlockTableReader.java +++ b/src/java/org/apache/poi/poifs/storage/SmallBlockTableReader.java @@ -56,9 +56,9 @@ public class SmallBlockTableReader { BlockList list = new SmallDocumentBlockList(SmallDocumentBlock - .extract(blockList.fetchBlocks(root.getStartBlock()))); + .extract(blockList.fetchBlocks(root.getStartBlock(), -1))); - new BlockAllocationTableReader(blockList.fetchBlocks(sbatStart), + new BlockAllocationTableReader(blockList.fetchBlocks(sbatStart, -1), list); return list; } diff --git a/src/testcases/org/apache/poi/hssf/data/45290.xls b/src/testcases/org/apache/poi/hssf/data/45290.xls new file mode 100644 index 0000000000000000000000000000000000000000..dd064497a0e8b64cef7e7dfd87c7d4c9e5d285a9 GIT binary patch literal 23552 zcmeI4dvIM>b;kF-dRtcXtgkIu^40sD=w(UHQ?lf@{HUbF5iyt$D~Yiahr}UC+Ymzo z;gLcJeWp-K2$Vjc(3TJig(QV(J7uP%Fq7%fp#l1u=|7zz3@y{(e&1T>?0eQZR}VXp z?RL0#j_zISoO{l1uf6u#YwvTmzWI06FMaHD_1|-`*R`&|{q$^^D~{+>_}&-FRSx^l zo~0Om@;g*Nd;K4>z$;))quvUz6=EyG#yT&-R*J0*+d6FJ*w$mKz;+q7N^EceSB1GWsdjo3C}tH;)WZ8Nq;Y)#m&DiD ztrr`8kG}S^*K22ivQpGyagqB{#Wz^xQu#HG9&ruo^?tm!Ts7Y|F1u3h#`GX+^RFST zbB{3`;8m2Gba%Vs?q+w=t?k}V;A&?dTI#yoTb7rqscG>d zp>U+TsjwM75AO_B*niXP;<4A;gIdri;J)EOmk-$UK<24B(QM$*_Nds|f2Cqo(XNT3xXR^d7-$+tyQ07A$wq(FlQqA%4$bFEdoeA6 zy`G>&E_vN`xr>gX6%Ix&8vfp2H~g_&I{M3A_Y@*T>H(+kes_oKcgGL_ZU@sHcxTWS<`))kf(vEGZ|Kq=P#vw_~N3-ml5`mg2m zA4(Uxsxt*=`B{CYSUzjc6v}7qnG*SQXPkW2ohdr|I6AZf_w8>otm*5W&@>J6d$F6l zeR1*h;QzSl?AIZ4TkjC@?sd1g(};Sf+?_DrG@{>Ww}>7zH4h)Z3tx*}Y6^Sr#s1S! zVb21zHzQQyEB73OSn5)TM0X2>lQ>Z;I@Ht*gxjE|VjAD?!L=A+@A6kHcd30i@hNwo zTz&>S7xC>L*eP{Tl#BR9iA(Lr9s+L1=P~Y`$9eCUbEaKt&W(^}I~w1OZk}I<+o^+eAbR}N06qpFaNjmJino3rKtKTPLHsuBwz^@t(5%~o^Ub;`H-m3G@y@zAw*zt( zKhEN)Avy0h`8x#7q?C`wxea7jLNkDTBefSN-T`4hHlBb-x_90L@PMHUr&4 zuG}N>a*yDWxpMDVx!g4ET9lxR+Ba>iXAlyZ+~Uk7Qg=;l$<1=!#qj+e3y zN_&l!brf!O4z)Rk(_e_%97L@kkRFzjOv&GA)ZYQT`FEF;au#}DvLRcGgJ>`e8)5rg zu@)KGRw@}IAKgjA_W86<*5WwESE*TdJx;V6r`so`y8$&ijW=tT(MaoeQhe8!F&2Xc z;Si&6{C;dB_`YHcVt8FmDcKLano=rGjZ5te{BIH`WSn7dz?j3%ifu59;i4~HEYi>g z#sM7G;3`TtD5qaO)(pT@OOeiCkss_vx#*!R=PqfHEG_#&?qiKEh;|m51-Oa!t8=-U9gmlJ8+`W?^xhF<2P5=e zrgIEu{5*u(JBYum#d&zK);A+9W3PCx=6t62U`jHMIC~B)lownn%DxsYbO`PSR%U#& zs86;_mU0p$qOUG6lV-xn{CL3D9Dh0x={Z*6t)myDRKv(Yw_;8%$n0iOiZIU5SGS`E zwSQ#W+>25kkXRdJIxK6fkK|Z^eaIzPe+(CxZ(y`xe{o29%I(quHOkINFS!@B8}ZLr z|H$%4d+TAd`a&$G>414mmk0y$Ph#Bn*HSgwVgBj@*%B5LbtiEDB zAuFDl={b1hg(;(&9vOEyX5S%gjsD53G;i5rJsyW(%tz0mY!@bO(y@jyJB+fiUzPIHJZ>?d^O%P}IyRPlPWBYxjL0tZmO+UbR${Y9EN67|e+l)CV*o=Y$7sWh?RqJ3u8uhl z*4ZD;%sCq{FGn)^`s|fuPV|ynQSq0cU$eQ5vCKPA+Fd9EX9YNm!?`ZzIh?*@&4zh9 zwXA2_g5%8~V`rI7TV_hc_ZW8Qw?mxfoX3w3!y~SMg>)Fl?~G4lAnC;Cjo3>U;rEr| zGP9Yb#$n6NbS38+FUK_pIjPGrCFdIH@fx@24bwWSH>8~?%QM96Pp29a+20_rqS@b~ zOYwy9dAiq*o+I+y?A5PKQ59VF=lYiZ*6RZd0O_pI#bK+S6f!im5pBiEii>8et!vokFi$uoy)o@{5bZAitsZZi*tmhV8AVLLXto(& z+qSkHwwb}3)|rmPB+eJJl`+?uMhp+`X=q!EjXQYPZv}d$D2k^XF$1*6+$z zK|eW?Mo?DT|J-Br>a29N`$h$F zqOgT&raXnqaW-@Y7pBFu_3goT85gNH#rk{BS8yshw9^tf5%ZS(t!ueE4=c4UrmSqY z=VK+kW3qfs)S#m(j)j>sYLCU9jI#r@6vtbPPCDA+o%J2!XWpw~y&3P!jYrzv1+^AO zO^hpRWi8C=S{I-=#C{dgagc{aIYsCJ9>a-0q2V*nSW6*8YZ-&m#N>E&A? zX2b2+EjrT?#&Y9TW=zj9E#JA>{tMdb=+5l7&d00h=PP9a=&1Cb73_%!%`l? z7?3YhXNtkb9)Ct?D+Zg4++!eDJt%9a zocj)!QsG6J$L`^hsz6TT$f#TZ<45H}47a0l5pG9BWk~aWa%HDz84)npxet2HxcMq?zoQY%;Gw(JB#a3 z2?-q+2gHx#qNTWw>(=5ru49YqbX}WeW*Othv9jViu7iu~Xsfu6>*V4(S}?BTI=Z-y z>+0e*(Kc9oO;2bzIjM*KwU+T*q~PaUItI#&uj5 z7}s%~U{v>ttVYyzhR&&!;`3-YPNg&?dY(!Jkmz$N6+)u7sZ<1sex_0}Bzl-iC6MS_ zDwRT_SE*D6iTv7{a>vs@eC5P@L_-gehOkHovp55p*t=w0vzJKN0*Qo0p0G#= zxrUd7MG3+pA^g^NUn;Nt;k$*9EzJ_ruX8pmHiWFNTrJZ!8Cfk%LULqe-LOg|><9>p zJz=pBb_RsS3Buw8AzQyC#Cj^6n>t>p#1NJk!V)3u@<)=8E1C6oByDA!Bx~mKoV{@sP17=Z9>+84L)NiESl=;bSO~ za1;`4NN>?vrmZs=W-8&j1R=ApjEmMXBeNyM+Bh6T9qX>#5SAOlav>b|N0M-xmq>^u z!9rN>3Co4>D(Gp$@&sXdf{^)@i&8ETqu!nCnORwuBabgCWO17Cn4L4 zZ7G)t;mfal`~KrEzHfrZFc-0em4>j=5LOz(N+H}GFjWfS9xst_FC-FHdcsN}+y}j0 zJx*9DgyXkOA3wFI{W9|-ONe<6oSQnmp7raEm*Fhp-ttYG%!keHc zVQqr2HbKZN*b-u3#PHwxj&fN7%;-r*&3-3f_=8$IDhAv^^=2{$GPHwxj2C+_?3lP~BffMYWy zbOS$_V7+b2 zyT136e|&g?$8fx83Gr+m&K(lg8^U@+ST8nwYrs@5HoVVEB)lII3F|#!y%0VCy(iBy9AA zjY9ZN=tv`3Xyi{~{5w;TA)<#Sm^0!uJMDTZHg^ULxV6kVv@26K)a0UxA*4TM~p@4B?B& zkDmeF$@CNM$yf2_{C5`DjLu0?*PKVkd6M{Xt$B29d35c0bez?QpRY5It}BnOJCCj> zkFGb5t}lUxh@%W>45G zguezo37Zo>+H8E3$IxdiVT&PbF@!CKutf-eJz#1P!r$-`34aq33Hh(gG&5@v!ry{k zuO7Eyix8gS5aB-UZ|K365dUfiXA678Rzuio2wR2liGZn92!Gp4B>XTW61IB6Rw4Wd z^dxLe*sxUyqx}urlO=33gl&ef%@DQ;;qL@YZ9@1_FOl$fA(61n6SfKA$Dk)+TY|7n z2ygxIKi}N!w7+5NwuJ45u-y>08$v!ZlR*U2meMYSANLXopMpffc2C$Ygr9((gzX8! zb|I`R|NJK(_|JDu@EFD#OW0uuI}BlmA?y&sPXUTf#0w*kuU23}KfLekNe*62d?B5(z&GiG*FAuuBL(2R#Y95`+-(TE4Pmzs{z<^pErfsSB@+G_BocOe!fqk_JoF^&ju75Cp&E_@ zyN`U}`5*n0&X}-|vV?dJEkf912zv}+j}Sf`F!c!G7raEmXCRTV#}oDl;TNGNPfvvK zxDfV;4ZHug>aVW+H*G2GF)bnfDO!ZE*AVs^!d@Z#^MI*W2*2bd5`GyH341+ZuMqwP z^d#(!5bE8aULmZ1_NMG}vpR-h|7{8T3}K%k>@$RYLim+{sZR*M>Ln6B3yFk%p0G~{ zpM#!+eGx+Kh5Ll?TVMV`UE?Ef5vwwru!Q}Fu-_2&8^V4ed>&_D=Fl&Mf9WL>ehm@{ z`#oX5A?!~O_8Y<%k#oFUhX0S7S~_#QHeKVMF2;r21pfrDb>Wvw_H$O&d!9xKcVI++ z8slk>Jy-wQ0DFi3jW)H)pGE&)^kH28Zg&E6MqGD#VwGpe9ftYHRCs-U=JqW8O*n5@ VdjGlZIoIo->-kqY{?%One*xK~)SdtU literal 0 HcmV?d00001 diff --git a/src/testcases/org/apache/poi/hssf/usermodel/TestBugs.java b/src/testcases/org/apache/poi/hssf/usermodel/TestBugs.java index 024f94e5b..402ee797a 100644 --- a/src/testcases/org/apache/poi/hssf/usermodel/TestBugs.java +++ b/src/testcases/org/apache/poi/hssf/usermodel/TestBugs.java @@ -27,9 +27,6 @@ import java.util.List; import junit.framework.AssertionFailedError; import junit.framework.TestCase; -import org.apache.poi.ss.util.Region; -import org.apache.poi.ss.util.CellRangeAddress; - import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.model.Workbook; import org.apache.poi.hssf.record.CellValueRecordInterface; @@ -38,6 +35,7 @@ import org.apache.poi.hssf.record.NameRecord; import org.apache.poi.hssf.record.aggregates.FormulaRecordAggregate; import org.apache.poi.hssf.record.formula.DeletedArea3DPtg; import org.apache.poi.hssf.record.formula.Ptg; +import org.apache.poi.ss.util.CellRangeAddress; import org.apache.poi.util.TempFile; /** @@ -1533,4 +1531,13 @@ public final class TestBugs extends TestCase { assertEquals(7, wb.getNumberOfSheets()); wb = HSSFTestDataSamples.writeOutAndReadBack(wb); } + + /** + * Odd POIFS blocks issue: + * block[ 44 ] already removed from org.apache.poi.poifs.storage.BlockListImpl.remove + */ + public void test45290() { + HSSFWorkbook wb = openSample("45290.xls"); + assertEquals(1, wb.getNumberOfSheets()); + } } diff --git a/src/testcases/org/apache/poi/poifs/storage/TestBlockAllocationTableReader.java b/src/testcases/org/apache/poi/poifs/storage/TestBlockAllocationTableReader.java index a209de6c8..d688ce145 100644 --- a/src/testcases/org/apache/poi/poifs/storage/TestBlockAllocationTableReader.java +++ b/src/testcases/org/apache/poi/poifs/storage/TestBlockAllocationTableReader.java @@ -1325,7 +1325,7 @@ public class TestBlockAllocationTableReader try { ListManagedBlock[] dataBlocks = - table.fetchBlocks(start_blocks[ j ], list); + table.fetchBlocks(start_blocks[ j ], -1, list); if (expected_length[ j ] == -1) { diff --git a/src/testcases/org/apache/poi/poifs/storage/TestBlockListImpl.java b/src/testcases/org/apache/poi/poifs/storage/TestBlockListImpl.java index 6b8d091a5..194a951a1 100644 --- a/src/testcases/org/apache/poi/poifs/storage/TestBlockListImpl.java +++ b/src/testcases/org/apache/poi/poifs/storage/TestBlockListImpl.java @@ -285,7 +285,7 @@ public class TestBlockListImpl try { ListManagedBlock[] dataBlocks = - list.fetchBlocks(start_blocks[ j ]); + list.fetchBlocks(start_blocks[ j ], -1); if (expected_length[ j ] == -1) {