From 0afe659835de41748436b7e0814b58f06bdf8a01 Mon Sep 17 00:00:00 2001 From: Yegor Kozlov Date: Wed, 1 Oct 2008 16:14:39 +0000 Subject: [PATCH] iterator over sheet data in XSSFReader returns sheets in logical order, i.e. as they are defined in workbook.xml (was in physical order, as they were stored in the relationship table) git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@700821 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/xssf/eventusermodel/XSSFReader.java | 279 +++++++++++------- .../xssf/eventusermodel/TestXSSFReader.java | 26 ++ .../poi/hssf/data/reordered_sheets.xlsx | Bin 0 -> 8731 bytes 3 files changed, 197 insertions(+), 108 deletions(-) create mode 100755 src/testcases/org/apache/poi/hssf/data/reordered_sheets.xlsx diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java index 181913f26..f0a84d160 100644 --- a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java +++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java @@ -18,20 +18,24 @@ package org.apache.poi.xssf.eventusermodel; import java.io.IOException; import java.io.InputStream; -import java.util.Iterator; +import java.util.*; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFRelation; +import org.apache.poi.POIXMLException; +import org.apache.xmlbeans.XmlException; import org.openxml4j.exceptions.InvalidFormatException; import org.openxml4j.exceptions.OpenXML4JException; import org.openxml4j.opc.Package; import org.openxml4j.opc.PackagePart; import org.openxml4j.opc.PackagePartName; import org.openxml4j.opc.PackageRelationship; -import org.openxml4j.opc.PackageRelationshipCollection; import org.openxml4j.opc.PackageRelationshipTypes; import org.openxml4j.opc.PackagingURIHelper; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet; /** * This class makes it easy to get at individual parts @@ -41,127 +45,186 @@ import org.openxml4j.opc.PackagingURIHelper; * for XSSF. */ public class XSSFReader { - private Package pkg; - private PackagePart workbookPart; - - /** - * Creates a new XSSFReader, for the given package - */ - public XSSFReader(Package pkg) throws IOException, OpenXML4JException { - this.pkg = pkg; - + private Package pkg; + private PackagePart workbookPart; + + /** + * Creates a new XSSFReader, for the given package + */ + public XSSFReader(Package pkg) throws IOException, OpenXML4JException { + this.pkg = pkg; + PackageRelationship coreDocRelationship = this.pkg.getRelationshipsByType( PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0); - + // Get the part that holds the workbook workbookPart = this.pkg.getPart(coreDocRelationship); - } + } - - /** - * Opens up the Shared Strings Table, parses it, and - * returns a handy object for working with - * shared strings. - */ - public SharedStringsTable getSharedStringsTable() throws IOException, InvalidFormatException { - return new SharedStringsTable(getSharedStringsData()); - } - - /** - * Opens up the Styles Table, parses it, and - * returns a handy object for working with cell styles - */ - public StylesTable getStylesTable() throws IOException, InvalidFormatException { - return new StylesTable(getStylesData()); - } - - - /** - * Returns an InputStream to read the contents of the - * shared strings table. - */ - public InputStream getSharedStringsData() throws IOException, InvalidFormatException { - return XSSFRelation.SHARED_STRINGS.getContents(workbookPart); - } - - /** - * Returns an InputStream to read the contents of the - * styles table. - */ - public InputStream getStylesData() throws IOException, InvalidFormatException { - return XSSFRelation.STYLES.getContents(workbookPart); - } - - /** - * Returns an InputStream to read the contents of the - * main Workbook, which contains key overall data for - * the file, including sheet definitions. - */ - public InputStream getWorkbookData() throws IOException, InvalidFormatException { - return workbookPart.getInputStream(); - } - - /** - * Returns an InputStream to read the contents of the - * specified Sheet. - * @param relId The relationId of the sheet, from a r:id on the workbook - */ - public InputStream getSheet(String relId) throws IOException, InvalidFormatException { + /** + * Opens up the Shared Strings Table, parses it, and + * returns a handy object for working with + * shared strings. + */ + public SharedStringsTable getSharedStringsTable() throws IOException, InvalidFormatException { + return new SharedStringsTable(getSharedStringsData()); + } + + /** + * Opens up the Styles Table, parses it, and + * returns a handy object for working with cell styles + */ + public StylesTable getStylesTable() throws IOException, InvalidFormatException { + return new StylesTable(getStylesData()); + } + + + + /** + * Returns an InputStream to read the contents of the + * shared strings table. + */ + public InputStream getSharedStringsData() throws IOException, InvalidFormatException { + return XSSFRelation.SHARED_STRINGS.getContents(workbookPart); + } + + /** + * Returns an InputStream to read the contents of the + * styles table. + */ + public InputStream getStylesData() throws IOException, InvalidFormatException { + return XSSFRelation.STYLES.getContents(workbookPart); + } + + /** + * Returns an InputStream to read the contents of the + * main Workbook, which contains key overall data for + * the file, including sheet definitions. + */ + public InputStream getWorkbookData() throws IOException, InvalidFormatException { + return workbookPart.getInputStream(); + } + + /** + * Returns an InputStream to read the contents of the + * specified Sheet. + * @param relId The relationId of the sheet, from a r:id on the workbook + */ + public InputStream getSheet(String relId) throws IOException, InvalidFormatException { PackageRelationship rel = workbookPart.getRelationship(relId); if(rel == null) { - throw new IllegalArgumentException("No Sheet found with r:id " + relId); + throw new IllegalArgumentException("No Sheet found with r:id " + relId); } - + PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); PackagePart sheet = pkg.getPart(relName); if(sheet == null) { - throw new IllegalArgumentException("No data found for Sheet with r:id " + relId); + throw new IllegalArgumentException("No data found for Sheet with r:id " + relId); } return sheet.getInputStream(); - } - - /** - * Returns an Iterator which will let you get at all the - * different Sheets in turn. - * Each sheet's InputStream is only opened when fetched - * from the Iterator. It's up to you to close the - * InputStreams when done with each one. - */ - public Iterator getSheetsData() throws IOException, InvalidFormatException { - return new SheetDataIterator(); - } - - private class SheetDataIterator implements Iterator { - private Iterator sheetRels; - private SheetDataIterator() throws IOException, InvalidFormatException { - // Find all the sheets - PackageRelationshipCollection sheets = - workbookPart.getRelationshipsByType( - XSSFRelation.WORKSHEET.getRelation() - ); - sheetRels = sheets.iterator(); - } + } - public boolean hasNext() { - return sheetRels.hasNext(); - } + /** + * Returns an Iterator which will let you get at all the + * different Sheets in turn. + * Each sheet's InputStream is only opened when fetched + * from the Iterator. It's up to you to close the + * InputStreams when done with each one. + */ + public Iterator getSheetsData() throws IOException, InvalidFormatException { + return new SheetIterator(workbookPart); + } - public InputStream next() { - PackageRelationship sheet = sheetRels.next(); - try { - PackagePartName relName = PackagingURIHelper.createPartName(sheet.getTargetURI()); - PackagePart sheetPkg = pkg.getPart(relName); - return sheetPkg.getInputStream(); - } catch(IOException e) { - throw new RuntimeException(e); - } catch(InvalidFormatException ife) { - throw new RuntimeException(ife); - } - } + /** + * Iterator over sheet data. + */ + public static class SheetIterator implements Iterator { - public void remove() { - throw new IllegalStateException("Not supported"); - } - } + /** + * Maps relId and the corresponding PackagePart + */ + private Map sheetMap; + + /** + * Current CTSheet bean + */ + private CTSheet ctSheet; + + /** + * Iterator over CTSheet objects, returns sheets in logical order. + * We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order, + * i.e. as they are stored in the underlying package + */ + private Iterator sheetIterator; + + /** + * Construct a new SheetIterator + * + * @param wb package part holding workbook.xml + */ + private SheetIterator(PackagePart wb) throws IOException { + + /** + * The order of sheets is defined by the order of CTSheet elements in workbook.xml + */ + try { + //step 1. Map sheet's relationship Id and the corresponding PackagePart + sheetMap = new HashMap(); + for(PackageRelationship rel : wb.getRelationships()){ + if(rel.getRelationshipType().equals(XSSFRelation.WORKSHEET.getRelation())){ + PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); + sheetMap.put(rel.getId(), wb.getPackage().getPart(relName)); + } + } + //step 2. Read array of CTSheet elements, wrap it in a ArayList and construct an iterator + //Note, using XMLBeans might be expensive, consider refactoring to use SAX or a plain regexp search + CTWorkbook wbBean = WorkbookDocument.Factory.parse(wb.getInputStream()).getWorkbook(); + sheetIterator = Arrays.asList(wbBean.getSheets().getSheetArray()).iterator(); + } catch (InvalidFormatException e){ + throw new POIXMLException(e); + } catch (XmlException e){ + throw new POIXMLException(e); + } + } + + /** + * Returns true if the iteration has more elements. + * + * @return true if the iterator has more elements. + */ + public boolean hasNext() { + return sheetIterator.hasNext(); + } + + /** + * Returns input stream of the next sheet in the iteration + * + * @return input stream of the next sheet in the iteration + */ + public InputStream next() { + ctSheet = sheetIterator.next(); + + String sheetId = ctSheet.getId(); + try { + PackagePart sheetPkg = sheetMap.get(sheetId); + return sheetPkg.getInputStream(); + } catch(IOException e) { + throw new POIXMLException(e); + } + } + + /** + * Returns name of the current sheet + * + * @return name of the current sheet + */ + public String getSheetName() { + return ctSheet.getName(); + } + + public void remove() { + throw new IllegalStateException("Not supported"); + } + } } diff --git a/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java b/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java index 8ba59de43..5a552df4f 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFReader.java @@ -109,4 +109,30 @@ public class TestXSSFReader extends TestCase { } assertEquals(3, count); } + + /** + * Check that the sheet iterator returns sheets in the logical order + * (as they are defined in the workbook.xml) + */ + public void testOrderOfSheets() throws Exception { + File f = new File(dirName, "reordered_sheets.xlsx"); + Package pkg = Package.open(f.toString()); + + XSSFReader r = new XSSFReader(pkg); + + String[] sheetNames = {"Sheet4", "Sheet2", "Sheet3", "Sheet1"}; + XSSFReader.SheetIterator it = (XSSFReader.SheetIterator)r.getSheetsData(); + + int count = 0; + while(it.hasNext()) { + InputStream inp = it.next(); + assertNotNull(inp); + inp.close(); + + assertEquals(sheetNames[count], it.getSheetName()); + count++; + } + assertEquals(4, count); + + } } diff --git a/src/testcases/org/apache/poi/hssf/data/reordered_sheets.xlsx b/src/testcases/org/apache/poi/hssf/data/reordered_sheets.xlsx new file mode 100755 index 0000000000000000000000000000000000000000..f46f59b032e8598142136b07ac8789e31ecaa2d7 GIT binary patch literal 8731 zcmeHMWmHsa+a9_lq`Mm=h7OUTVd$2S?(US5?k-WfM7l$Ul#rAh2NVTV8l?rk!S_9{ z=Wu-2_y7COUTg2YX3f5@wfB8p&;8ub^{6W&Ark^n0q6h#fDWJ|*va98006L}0swab ziwF-vF3z4-&Yq^)ey&y?#@xP6jG6b9_bp!@7BRt?xV5T2n(ZI(wC8x%~S$4S3AfEZI zIb6VJBH~0&8DCMvRR1Vkz)QA}iIruO$ju68Q8uB^hrry3thcyw3JniaIYd9_1qzI3 z6-2ke%#5q-{#-yJ1k#__BFJAnvT>?!pG1WXTk#4w{Lr@zYhS7~u7r)S7w*WNB;vjO zk&y8^4>}j~#POihQ$3a$;`b@Nz4)xqq#H5AKP&=Iz|p6*94)+9ATa5(3H20i`=xVt z+}PuykEh~a#@+v!@fM#t2_J3PLwgEcpidWHofXgv zOFLB*I_b2jdl`3WniGl{rB03`kiwKKpwq{jllLF4>~7d;(@>hJR0;zy`*+}jCb&IeYy>iTy!uxOHtFazZTU(*r&ulACyC=lg z$|F92ElV=&=Gtc@BK^eJ}&MK<}NM{H?z&ZrH_0g zefYEgzmKl;F?)E4mOXiL6`mc$1y!8Yq_@$+Tx$7(h92;Pw#5=S8+F~&2VO@F>*R%@ zTcg4P+TxpFLEbgK^707+n5nF&q0Fta7VIdsQRR67cmvCX9nGjj*a~%HFKb&S7wwO; zDxCcZOa_y~a@!v#ryi1vfm3#-PZjNq%a^iVrgl#xbx7w79msX8k|#g4iw!F5-H9WL z?w<bsd)&z8sqlxb(_K2!d?Va>!% zN8~LgvOZ=N%0e&0#3MD@!)=S#J-<~Jiy<%61Fn{OcooHkKLJD-awBF_$sOB~Gi!?bK zAlX2A2zPw4f7*2IS|Lo9EzUi^j2Q@9qh9dI zaHM==Q>tAj%0}O1x8j!&KtsqBD|Ss^f|fE(s5RSH8wL|l6RVW=AxyGOKcUacq^(Bl zY;vEBV-S^%2}#{czm|b)`sv%SS$uRA%5F@Z>c9dkjF@pX^Ek;F#9Gr#8id_dgf`D> zo?=$cjFcXux@(=qtFAHL*`6i}vSCa7qFN-@O#`teT$&?hfq1j?)?uX}N8Y#felpZ1 z-f6pzKbtozA| zfk2BeFAqNrDz@F(kk|BLr`r9!l)9Q_-cse74-kT^fwJYo{flFS^i|G=?z$#f>ouOE zzS&f|2luH`JCMT7CB`D1{5`dB42;|={u~OVnCOnkD3sS_xjmOWzACS*dCL~@ zcjVV9s|zVh50kvbN_}IBKs9$VO~0H*m(d98wTO4R5LS0lYbQ;z9}lsZY1n`b-i1at z2IuDVeeXc7XRvl>0e%YOA<-Zfq6OkV>-C*)Kiok3T_TlJC>k{2N#ueH{l6rV?`I-Y zAUCs3NW=y7SxEY{ z;fd-QazLGS<+K;O`d9J~@Tny?SCm0lb0tRAZp@}{=rStQt44Xz%` z0MwWC>iIEQ2hyu*J*$+}&vjYuuDAuF!kR)eyCf$YwH^@V^rxhpr6cE$?U<`%~% z0mX|8Y#mrFnNbNizNk;qi-G*-daKI&J9SzL#-&Fq>8u))hgV_Ffm>K4-F_kY4DwsW(f+D(64&?S(J>`D`CGI$p8#~ieC-vzWelT=43>| z1uoK-A;)5&sMn)@Y_~_2&(DIY11871X0+oXv%@>yCy|j@Nsa{DkzUG<7sVm1@e-R< zg_Xl;2#FPE{zCp9Bc5(FmiRydNe`cOVqna-aF1oxB~eFxEDd8%STfvsSxVXD+2+y+ z?by2yuU`ksA$G@Cj|-B$DO_W~nXF)0CLiS_L5g7$ecSkYBH2W6pWV2)#s<~?t72Nf z#Jd+Fg9sO2yxuG!fO?%r49|&a>w$lw;m2=jIJOboE`*nparjC2ud?D{YvyicspaWz z=WOF~LxD=-159}BmpuvdL2qoWB$8oNBne}gLW=Ni3boec*P4b>piHj^JVRS;N)oOT zv%V9<`J$SfP&4aXN)Kyu)~y+7aINyhg1%{ACUTyOB+@|JE`|%6e%8cc7d5`18c3J1 zp6dQxlgCs*jVx#qsc1k$$g6v98Vzx-G-p5>$-c}r?_l4`Mp&3(cNQoY$A9j0wY_2E z<@eD9{=$E+l+VKlUKYa#xF!69asK^Do_>y2Kfk3!3u6BcULHvfaive>IYswl z=#tZ=TBfQUM@%5LmL&%by9+j!Vks&GGIYdxW0;}nDl zZ!4W`)?J?)$I6f=M257a7lRM$J1#UC$ss*dHB1nKY$ZR&uaDU?a*K~dVNome&zk7` z!CeVYm4cd8_+Le-Pc>Nnh>is$bh8~9`!^X(33ykwLqi^(k&Ngrps0fD)yXhBABtw6{e5>>x> z_!^dNRo13f)LAjSvkTP;%f-}{r7HTcYib%)R)y%O>fw4_7YK&q9PoynNs%WWh?P+xXR-j+XnA*bN&WaYrjl1%*FY&asair(zCUjQgrAujDK2Eze)<84u>MGvYs}lg!L(+j*kI zl$R0v-Hdc>Oq`B)9NZRN7p8}=J%mVu0E^a{ubQ)F>H{9D$1~6d1n?C~J^+Ddprg?z zq+LTwUlE?ggffHkiBQD~=7G`^R#_OH(FRZ?NsC}0BW`*WX(lG~vdP?!uF?59%oP<9 z-ez+rGvn1CGhSc94m{uN6GB*R47U~@>Uq1LqxQb#d%3XX70{etfbeJ)4P3)1n|pL6 zSCcl)8M6J6Z^yY=qo(r|@$!f%=iyV;@@XZjdKsM8lJi;n6m!-=0dF7_$%8_f`$rSr ztx(T>RLg@mOo7)*)doHXsDymvYFSgug_prf5>YC`P_8E~fWYG1J`&dGNbwF}97fVy zm44iQ5cfG?N-B~lAtjf^GAt;1jXO~;%=`_&g}^Ag9@Jw*KGT)_9R;inz2Jc)<74^P z7CgTGdZzXL>si{zsL4c{E6*RFrw{sRxVUUSa3_YZ^<}LFA8#(u6yGlh{&sjkvhHgz zaDCp-^6JMAKM_1WTe|7Ed(VCxZ;!;)N?u-e4m0gLh04pGCdCe`~I5o@cqJ8lU+dQu0hNG8uB$5;yBc2?y^_wulqEGjQ_E7RD1-S*7+8{tX{C=_)x<>qQJA3$^n-J)`*eSv= z2P`OyU>H_T5+(+8l`%w)F37>yQD%LsdYNGR!)GvCqhd%9b%p3-ODoPpEd!ougWGu-AtGF0Dm#dl>6=Hs;xu(h>YWhCj)Y)q(M(cR%!7gYZxwKxNO7{Mo~?jh%_ zG|QsA$T<{5`gUs^A>~t48Dqq7Q=3?u)T2gJjF*x{?35A0q12rlcJ!etif9B(Mtw^Y z*jY34zMp3^B*+B6=RoF>V&@~Lp#`K-q<#?SaInqFe$XDpp(q_b9BijLbG zR`7dNlTE8sdbF*xhvR1HghSrtPCcRhY}`8v7A|s_cNK)h$2Y@vQ1jadeLv6}GlImK zE0da5#w4rASgh*JxshnSl>$p9{mS)gdTUBYdJtM0g)3jb>l87eeqIZh#&467$N7#e21saG>ZK*e;I)@i;C>jJ=ct~8mF>%Yx&H^2Z&?u zw}q`0qi5Z#wSZ%mSVDgJV~Npln&}Aka9w zqsWPxLa9mEKdC=SO+F7oNC>Z_!sBwK!kWd?c75h}h2>sx4b5)^su=cHs!>e=FfcDt zFj43()H7ASKb3xwxoe!ssEm;l%^uBhm*Y5^l0fjSKwagDH|`U>uVuLab6bxu4g0!N7 zvHMVq&QqPllev?T3<(bf&anskt`%Rr|4h}k_KcSa%mcAu3N1XM%oY#% z)Jpkgrsmae8xX33I7eZ419AaB|H}&F=e!lK0ak;rFuEBIXy{k%I_V^V%}?=WkowdVnE^G zO#|<|JIlhtteig{11p@s2^>|eNTQO_zD>DS*Hd}CQaM6${<9B8EgtsvW6$sb-~g5GwJ*@o`@hY0!!vw-9(bnN;p;hkxFu%kVxjKt;_7kF!o}U{ zraho7`o{@_XATgNqM_DLOZa3J`~w$fkL!J=h$2+N_P8V_Mu2=3{hil$tk7U5b$E!tfC^bq_S?K18uk7n=!>p>{RNZ!0uXyW_nvaYVz zPppc(_CFh9akK6#GLlV3a7{@fkMSIvP5V$i|l2> zr9A0W6Xk#!LP$zJ^(br@SbPel=(W4H>3=?+Z}_A*f&b~+#FA$G+9ye0 z0kY1QV;zHd3>)RAM<1^2NCkAtRx<$F8=5>HLZap~L<&eWu|?#FED1&&$K7;Ih+BQ; zh%+wTHQm!@bY#&fCLT_nPp`fLntVQW5IdrAd#agAV3@*Gyx`cLk3Shgdb+Te`FrOz zq6iGjg3HVt-lgLGS!QOguK%d)MqmFr^3x#BP#!#)J(NRS#64cC1y;;(b9G@HcUui4 z?^J%}8cupbMOUFPSnWNgxt6z#S)ZHUk;PNDm$Zoh1s&*$4qiU`W-!6$x|}t%gLPcj z*?Bd}$GEO!)g2y2g|L*+bAU(xmHXv@EsKhF>~!9`n;{IUDjdw^mK78 z>qYR?BNUoA`e~;JuVg^-Iarb=u{FBH^KAOMqx9&(cF_UqutKNw7*<*3YGUA7cHvoC zq7$TH6|=CPO~1NGBauMYhJrZA2}v_uO;XYVM~c;#5Bp6IWx#N=!Kjss!r zZ2DbngI?1)Ia&X3UlHbAGSn3W8|VSf^^aj6=AF_((jfMrVAI^BQjGJ*bq=e@>xp)1 zn_r{cCQtFauXOSAH~5`mooeXQ?MhfKXYfw!fYaV|TZZit=`52Dg`MD}{TQ(UR^l&|TCCrq z{4^GCquh4FexV4$?TdeZ$e-Wkr!RIJ<+hRY3#I$cPn5seI=2yS`zF5-lHry#{5?GW z5V2GxYY7{WieumgyG&KD>kaO%8rGP`6S3+6w%_00177!Tq4yjlu2k kzn1tvhi|~mhyR5CyWp!UqrmlgV=>_XG~tqOq`vv}Kl*UM3IG5A literal 0 HcmV?d00001