From 938bb91c51b0380ea9ed0dcb76a92cfe7b822f30 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Fri, 9 Jun 2017 12:47:03 +0000 Subject: [PATCH] Bug 60374: Add test for ArrayIndexOutOfBounds on old Word Documents due to Non-Unicode git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1798200 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/TestAllFiles.java | 5 +++-- .../hwpf/extractor/TestWordExtractorBugs.java | 20 ++++++++++++++++-- ...n.orthodox.www_divenbog_APRIL_30-APRIL.DOC | Bin 0 -> 23041 bytes 3 files changed, 21 insertions(+), 4 deletions(-) create mode 100644 test-data/document/cn.orthodox.www_divenbog_APRIL_30-APRIL.DOC diff --git a/src/integrationtest/org/apache/poi/TestAllFiles.java b/src/integrationtest/org/apache/poi/TestAllFiles.java index 3dafca291..f3d79eb17 100644 --- a/src/integrationtest/org/apache/poi/TestAllFiles.java +++ b/src/integrationtest/org/apache/poi/TestAllFiles.java @@ -230,10 +230,10 @@ public class TestAllFiles { FILE_PASSWORD = Collections.unmodifiableMap(passmap); } - private static final Set unmodifiableHashSet(String... a) { + private static Set unmodifiableHashSet(String... a) { return Collections.unmodifiableSet(hashSet(a)); } - private static final Set hashSet(String... a) { + private static Set hashSet(String... a) { return new HashSet(Arrays.asList(a)); } @@ -249,6 +249,7 @@ public class TestAllFiles { "document/Bug60936.doc", "document/Bug60942.doc", "document/Bug60942b.doc", + "document/cn.orthodox.www_divenbog_APRIL_30-APRIL.DOC", "hpsf/TestMickey.doc", "document/52117.doc", "hpsf/TestInvertedClassID.doc", diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java index fc1011470..af7c8c167 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java @@ -17,11 +17,16 @@ package org.apache.poi.hwpf.extractor; +import org.apache.poi.POIDataSamples; +import org.apache.poi.POITextExtractor; +import org.apache.poi.extractor.OLE2ExtractorFactory; +import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; +import org.junit.Test; + import java.io.IOException; import java.io.InputStream; -import org.apache.poi.POIDataSamples; -import org.junit.Test; +import static org.junit.Assert.assertNotNull; /** * Tests for bugs with the WordExtractor @@ -52,4 +57,15 @@ public final class TestWordExtractorBugs { extractor.getText(); extractor.close(); } + + @Test + public void testBug60374() throws Exception { + NPOIFSFileSystem fs = new NPOIFSFileSystem(SAMPLES.openResourceAsStream("cn.orthodox.www_divenbog_APRIL_30-APRIL.DOC")); + final POITextExtractor extractor = OLE2ExtractorFactory.createExtractor(fs); + + // Check it gives text without error + assertNotNull(extractor.getText()); + + extractor.close(); + } } diff --git a/test-data/document/cn.orthodox.www_divenbog_APRIL_30-APRIL.DOC b/test-data/document/cn.orthodox.www_divenbog_APRIL_30-APRIL.DOC new file mode 100644 index 0000000000000000000000000000000000000000..5a9f0d24c0b64b488539a31945e8d814e2b54966 GIT binary patch literal 23041 zcmeHPYiwM{b)F?fJuK3)o=$Do87XQ^#pIThtY_+gG)ak+#Yec56(_Q#%iY=CYu&wf zd+#o37KNdrNLrwFo3sH^yJ>9)b{Z#bfwpPu0s$g4fBVp{6b;fs{nz{{93e&36c+n^ z^H}auB(<>=H?sB;XV1NJ&zw1P=FB-WcV>9`pSE6k{m&oxcd_jHglHGn7dMIZb?GVG z?{v@i3GpG^Gwu4~;v$oG0dU*8?jQ}k3t6|=Hbj$(4glMH17IUy6W|`eWVBJOOS$rio5}>D&=Q_O>9NR(Vc@K& zLK3C*&@7(^Z29Yuq1p0D`_j*w5=^ZX%C3kh$PkI3*`hVwE|E&n2UF{-m4E(EexUX4 zaD4&Jf3rC#*UkDr`+Mv9;AkKPFWwFZ&l)Z7yAQ!0#{Fy@B~iX2&qQ%f9!mGg;V4@z zsj#Bs!gz-1ND_=5yidFL=|kd2NB`oNk-Wxr9X+=F+>HdZ%PXIK_3Hmj7VrDU7dDDV zANk9F=|kG*Vd#uMj8DH0pxu89K-;|nV9Z*F%#2mtz>Hg`fEl;Sz>IZY1g5la12eXL zADA(W%*cB$Fyr1RFymUlLT++BjQ1S>3>`arW^Cfjnl4 z`lKA3njSkPW%pCESN0#+XDNgr4P_e)bFrG2<7s))3!|j=o;S_V3~0isPpyN$ z0206drV_oEHc@@>e03Kb!u=TRIX_kRM%=HJRiCoL{R6^ybBmt?=Cu~4Zr-qT&0x0O zpMjp((SHX<+mGkBEuZ&_*F9)#YdtWpdi)5Udy!7N`+>!8qFlDikAT_Uqwwz|z?5gD z%jv<%M@#KQG)&zf`SRQ@hr_M}_v|=3?9y+>$I&gv%lRySrlwZ9UO)@5H@yhVnS#r= z8g}J6etbTKd@g;ZGHyMsYlY6()wOA`93_=Nq4Skg(jzlbD5Jdmw3<~prDXS`^0CME zrMos|JU^_c99&}PRU!~R;U~qYOxlrZtbC88*|a=xK{(}d}p=;9&N*j&9e@%_{$S714hr;#mcf_dlyqCzV7X(o9?1GG{ zu{^~%bW$bN1j5HYq|HaM^eA@(Mae|PbADFIN>or_7t49SQj|qMk+X5cgdSZ=O`fP2 z>I6s{1yRBG!n7Q#=z=Q|-;|^m=DBVAjzvV ziXE#t<*{O;kZstX0|z<2kbotugO6m1Kh^!&IAP2uf$ zR7C^Nc(YJQx6MBjdts9I3)NV2*w>?Ls@Ul{0H+`(B;}=~7ja zr3Kk`(#@JF`>>@RD#6PZQ5kT~l+i6*F-8pmR0tWhB8w;yb%<4^8i&XM&7M@K0(1dG z9)%s0RR~Rzcs2@hxUJ~=N>r@6Oo=cLik9)_d>V%pZ9a;FoGDJXK1q<4OGsek&c=Qj z0w{_C9YAwg%7Uk`3{Oi){L zQK6e_4%KiLJy_+_at7Q_s`*3~y_^gq)4<-WANUp2F#^wgwF1ip*^)iy{3MI2B}jup zXzL|(7SI>*Q}kmibvaShT+~}G`T@pkWg0%|lPJ!r7!RW=E}=(+bY}@|8TNs4=A&wm zlW0|tz?VhRlPIuUwf0uio|Xp=kvy|esO4mz^5TLX4B}2Vj_s1RS*GHs1XbqbI2@Dj zp&O!|ps;LIQl>c8RkxPWPEFiIyDHP?T-huysZ`Ng6*LmGR)&8lM#e`(Avzv?G3sFS zO;a@DXx?F+3ZS=AqJlgqv+(e;tm35V<&fJNP&t~1Lhx`Rf*nD|S=cHRMlTXY7hp%B zEO`Y#3nR;BRhb2A*YJc_uK0;@8J4*xD{)n!<$}O*wPG3fXzfEQ#zWg`dop&iBo~?w z-gdA6t1ZK~Ki1ca2*kR2ag3OvZKmeZ7_?05vteP_tS^ak%BvL9HgxFNM(0{W52LxF z_UK-05lshAYIEC0ulQlrDcI9~71q?)@M$@@gm5XmS}0e->C>e>`eodt$$ z1*rREq|Jn0KY`Y=7wGj#z%JkwG>r*-T6*m+bPKvxpQByr9LNs8Z`;#!eq*AB9UQif zuGV0gXpJ;2&(K97WkJ$CqsxI237pXEgz9;ft7$W);kXy`wRv)-{Av z`U1=dV~N4wPhon&)J$s>e7~Ee1;k9fKm-EQ2WPv5R|%TwYx2&3e^@ZJ3nZAZ;G+<0Z>sBf(z;PzOVtwn87C z7Xr=GeQCd%4HSlI?3WbO^3r|HLoZ=p)I$0QBlP*`5D}`5@eQqtuFuaF;jLB9`c7SB zIrbKm^;v;w?q%F_7!h31MQWk7EG&#QOM|^)&9r7pe(XU=GhiyKsEhzm2%^}}qfolW zw8T&ufsE0n;+H^a9lVpmn22k248S;Ufl3)}Ouy1qM9_&%z{ent!L1y?xQz{3BY_u1 z0n}v9a)Hhelnuw2BM#f7$_x>qR#OhE&+!L^oxy#sZt%o++S*pIG2NdbC}KYxq|eTc z-bZur@_B@L8U(x(Yyx8kv~-9jA|vTB5QB^!IC!vONa(b! zW-I7W44x<>)JB9>#=uj@PxUq2Cspj(-W0}F(3cj>v zH*u9=MVqe{00U-pbZrOFFk_UPGL1JoV>4Ev$6zkjWiUj-^p2(hKLQtTZNg}B0!{;C z*o1REO!Io=P*g& zEnmj4+Rs)o3WvQ$c!Gp5h%tN&oNP)#bBA5kt0 zT=CxYLP*w|Q80#aQ~-K&88RI9m1b3J^9*7l?S%yu)M$<%+RQl;*P%18EN{WZU=}uk znc18TI&Q#P7cb_4PZevy&djV~y7-61Q zn`v6Zlrr)zyWcjpFr`_>%a;Bl^SWt$$|vndQ(SGJ4gkBs{(LH-^=(JXWD% ztb}=3lk#!z*2-3@L^nQ~*RO`GM6*1NpVKcj@lAbszSW+8((+g1^Ip^M=<+YU@$_%~ z&A+X=Mj#$udmW*@3B@%-GtdMdeq-mF%#G_(OPQM~i+pH;`u7E|eIlw<+0qVa2J=o+i1o%C`w*h|u#s3vx2iAqSHnbb??vLyLuH9+1zw6{} ztENQ9>0ir!NVEg69BSKtC-5%7Ix@MkgsGTIWC!OKiy2VSo zzqbCG*lgZyswZ*pY`%{Aca}9tKHlbd-uU#L4fXF{>TpjDl6?TI1wOB%ou03QYmMT; zdtm|_ardNN85aVJZUaADYy)0D5ykkNt-WNs@XQkNG7anzg9BF=Ul)7Cu3?q;ssTRC zdiYKkd*z}$9$}9_bVT0o=v@igkRO$fZk4h z7bOYtx2b=&*gRuFdX*~H%KI6XU0htMF*a~q#9!UjFLv(wJP2m+Z6c8q*flX7VX`1P zk=iCQ3neTfiOnA+$slHnfskSy)cBilf0v=FqUOEx^b?;Wkgu`Cx)N}^PW*afLc`PE zcCPhrUtuj7$;%CO`EvuO_xoWQDfYs<)+EWc8l1-%9xme(aMRT41Q(+W%Xx%)8*ewg0!d{l8ds zJ%TIGn1*N%j;oQ8>n06=b{}s)A|NHuPUtMb-F@49)u4ZTk$mGS( zomh+4?f(9EV2#3>k$H{mckU|&1y-Jz;I3xpn(Y@;;xMNA1T$ab9j~3hn3TPIPiix_ zx#L3-z7gT+fOrvm&4u{ww%_|q1D#^SRYi_T{( ze%9i17ViS?z^3-wG|Ro;&T{ovAYH?ypAYIRcd3T0yG9Kxo&VUe;`xt6Hq!Hn-HR7@ z^{;wi()S!*UIxAl{F*%_uomX|kF-56@jcM<9EE$X$8a3t6QH{Xc+_I|{w+)_`;k6v z`4BTM7wvPi_?w{Tcc8zv`RXy}rvHlcZvnrI$vp9C1SZC&@1cAxw>0NJ5+oj~_o1NS z<@Dg>SvvpWQXDq)OTRANVV1zlJv+`0yL3af^y@U_-miw2^I85(O|5i&6D`Q|AAbi- zOL6&D!>)YCkI$!&&!w+a#;vEd&VRJdf0!*JGp+LRj|2NylK;K9WXE_85_g9{v7+~C5-c|iTn@ppfGO;_~lJMX-6^_{Cfya`(eG^uJz{71uw#pWsgw}*); z&sXw4l0G8@%9lcd!?qMS9FHRI(C)+;H5|n(e!HEbhG6+OJP_;c9jd#rw2h|DOK?uaer_ literal 0 HcmV?d00001