From 3f91e4438031548bcbd0d492a2119d746503c5b5 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Fri, 21 Oct 2016 16:31:51 +0000 Subject: [PATCH] Bug 60289: Fix handling of unicode escapes with lowercase hex-chars git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1766065 13f79535-47bb-0310-9956-ffa450edef68 --- .../xssf/usermodel/XSSFRichTextString.java | 9 +++++-- .../usermodel/TestXSSFRichTextString.java | 22 ++++++++++++------ test-data/spreadsheet/60289.xlsx | Bin 0 -> 8570 bytes 3 files changed, 22 insertions(+), 9 deletions(-) create mode 100644 test-data/spreadsheet/60289.xlsx diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java index 980f1b7ad..e93f0fa4a 100644 --- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java +++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java @@ -75,7 +75,7 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.STXstring; * */ public class XSSFRichTextString implements RichTextString { - private static final Pattern utfPtrn = Pattern.compile("_x([0-9A-F]{4})_"); + private static final Pattern utfPtrn = Pattern.compile("_x([0-9A-Fa-f]{4})_"); private CTRst st; private StylesTable styles; @@ -244,12 +244,13 @@ public class XSSFRichTextString implements RichTextString { if(ctFont.sizeOfShadowArray() > 0) pr.addNewShadow().setVal(ctFont.getShadowArray(0).getVal()); if(ctFont.sizeOfStrikeArray() > 0) pr.addNewStrike().setVal(ctFont.getStrikeArray(0).getVal()); } - + /** * Does this string have any explicit formatting applied, or is * it just text in the default style? */ public boolean hasFormatting() { + //noinspection deprecation - for performance reasons! CTRElt[] rs = st.getRArray(); if (rs == null || rs.length == 0) { return false; @@ -311,6 +312,7 @@ public class XSSFRichTextString implements RichTextString { return utfDecode(st.getT()); } StringBuilder buf = new StringBuilder(); + //noinspection deprecation - for performance reasons! for(CTRElt r : st.getRArray()){ buf.append(r.getT()); } @@ -381,6 +383,7 @@ public class XSSFRichTextString implements RichTextString { public XSSFFont getFontAtIndex( int index ) { final ThemesTable themes = getThemesTable(); int pos = 0; + //noinspection deprecation - for performance reasons! for(CTRElt r : st.getRArray()){ final int length = r.getT().length(); if(index >= pos && index < pos + length) { @@ -406,6 +409,7 @@ public class XSSFRichTextString implements RichTextString { protected void setStylesTableReference(StylesTable tbl){ styles = tbl; if(st.sizeOfRArray() > 0) { + //noinspection deprecation - for performance reasons! for (CTRElt r : st.getRArray()) { CTRPrElt pr = r.getRPr(); if(pr != null && pr.sizeOfRFontArray() > 0){ @@ -556,6 +560,7 @@ public class XSSFRichTextString implements RichTextString { TreeMap getFormatMap(CTRst entry){ int length = 0; TreeMap formats = new TreeMap(); + //noinspection deprecation - for performance reasons! for (CTRElt r : entry.getRArray()) { String txt = r.getT(); CTRPrElt fmt = r.getRPr(); diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java index 7d45351fb..026036548 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java @@ -229,13 +229,18 @@ public final class TestXSSFRichTextString extends TestCase { /** * test that unicode representation_ xHHHH_ is properly processed */ - public void testUtfDecode() { + public void testUtfDecode() throws IOException { CTRst st = CTRst.Factory.newInstance(); st.setT("abc_x000D_2ef_x000D_"); XSSFRichTextString rt = new XSSFRichTextString(st); //_x000D_ is converted into carriage return assertEquals("abc\r2ef\r", rt.getString()); - + + // Test Lowercase case + CTRst st2 = CTRst.Factory.newInstance(); + st2.setT("abc_x000d_2ef_x000d_"); + XSSFRichTextString rt2 = new XSSFRichTextString(st2); + assertEquals("abc\r2ef\r", rt2.getString()); } public void testApplyFont_lowlevel(){ @@ -382,6 +387,7 @@ public final class TestXSSFRichTextString extends TestCase { public void testLineBreaks_bug48877() throws IOException{ XSSFFont font = new XSSFFont(); + //noinspection deprecation font.setBoldweight(XSSFFont.BOLDWEIGHT_BOLD); font.setFontHeightInPoints((short) 14); XSSFRichTextString str; @@ -423,8 +429,7 @@ public final class TestXSSFRichTextString extends TestCase { str.applyFont(0, 4, font); t1 = str.getCTRst().getRArray(0).xgetT(); t2 = str.getCTRst().getRArray(1).xgetT(); - // YK: don't know why, but XmlBeans converts leading tab characters to spaces - //assertEquals("Tab\t", t1.xmlText()); + assertEquals("Tab\t", t1.xmlText()); assertEquals("separated\n", t2.xmlText()); str = new XSSFRichTextString("\n\n\nNew Line\n\n"); @@ -439,8 +444,6 @@ public final class TestXSSFRichTextString extends TestCase { assertEquals("\n\n", t3.xmlText()); } - - @Test public void testBug56511() { XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("56511.xlsx"); for (Sheet sheet : wb) { @@ -470,7 +473,6 @@ public final class TestXSSFRichTextString extends TestCase { } } - @Test public void testBug56511_values() { XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("56511.xlsx"); Sheet sheet = wb.getSheetAt(0); @@ -533,4 +535,10 @@ public final class TestXSSFRichTextString extends TestCase { assertEquals(font, rts.getFontAtIndex(s2-1)); assertEquals("", rts.getFontAtIndex(s3-1).toString()); } + + public void test60289UtfDecode() throws IOException { + XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("60289.xlsx"); + assertEquals("Rich Text\r\nTest", wb.getSheetAt(0).getRow(1).getCell(1).getRichStringCellValue().getString()); + wb.close(); + } } diff --git a/test-data/spreadsheet/60289.xlsx b/test-data/spreadsheet/60289.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..2d1b508177bbdac9148389a9cbebd810d76bff39 GIT binary patch literal 8570 zcmeHM1y@|j)@_0l2ohX_ySux)OK=Ur-62Q=L4pN`PJ$-5L(nu%aCZq3vcfLEO?6c4Jp>yjQLv0unE-Gv{UUs9o zPB25#cmvE@-(Hq|p~=DN75+H?ByqTCRzIbO4S=r$sj_ua`WEW(men9M6?1K;=y*Iv zro(L@JdwE?Ay}Ar*=^V4QTo@nymOu>au_n@?=xaEOo6l%D?}g#jL08NRHAw!Pk{3+ zwp0lDg$h%Gs{-d;^=GR2v<&RCX{{xu2Xv)JW*D3h^tB#6LMjv>t3*u^vCQR&`dD9k8?u{Ew@h`)P7LBH7V-S?(*xASjzdmW0l zd8v2{5Ia$P(uFILpRaOG+}z+3XA2}!Lun}KFpx+3S{dv7E`rbdZ#GUggA*ntq3Y-b z39e!hLGAA6Ndtb+i{f`MVnCtr0FA#y*XA{?HWDnlmSDm27|;f1=5FKY!Or$@f0p<8 ze^K?{8ck&r%P+g0;UXX1fLA0(f~*$eIptDT;v|3c_LHyasN*P}MKACM-3*vhgjS%> zD4$w_Im>za_Up9;lO3zw`ZVDp`yPyYnUsrYFh z#3xV{HyKF2OAbloDgm+dhKYV|op7(pIN|V|$S1C%{>(HeOcZP@Av%cr+(dG2j|6LY zd^-i2!pcfJm7&4(r0$4vP>SoCEe|}VSt3#EKQeiopV$8~u~@QnFifkXQ8>z0MDp5* z`ANgwaP4hN)Zz?-zjb}%=EQ6Ae!m`md*ZFot|}|?k+xw7Eh`VLd)C>t_lL`ORDw4t z=feNU_4ud-*EN`h6~=X901BM1Bm3`k_i=G|uyk>8cvwsRMSX+^>cisV|MyjvH2Ctj zOboma&j=FrMk{>VKmS&$q2~@jnLyE<7Gaw_6Jqu)Y*|8qZyu!rY1xCu$wz$Sy&CT4 zOEu}Ft@r(gW|tMyN4h~GZDE$26@1}71r}71Cf8S0cLiVtp>7ijgF^g1Vxgp?$o_`> zao6wgpLy&9N#hF$pQHDZt0*@OoD#%Ah2M+p>)aSgFU^j=FJYuPfjF=7v=#ceBv1=} zekGA+uRx%Y`kM81WJeCI{FlM2VbSAc5K!+Ysher6!;>CSvi2YmS9zM!zM38Zb$lX` zo6Qq~f69fODbF}GVLq3K@dFd)co;wa%#1j#apy1&+`H5Y%3~*4T>jUIJGNX;Ya!qr z4`c#+7FK&@*N_fn43c&T@M0-obXSnSOvs7N%*d?2KJ9B!oqL5MVXYq`Z(~9;957ca zJ4-m?`;ep6D}shcuJzWMglq=Gz&w6n_pQ8idSU-J-EAj~ugfy%3WpJT4LI?I^Ch@A zZ_HWJP+YQnOr@17lW_*h7x6nKlO_bEmcb_Ay5SfVO;|Pf5E9Z$z~dxJ`qmU&xas%D(a80%_e?aC2xQ z^Aap~5*&IJC;l3GICkvs}4+*~)3oRy?@gt{{Pn_d=W=fPf znM#WJQyFHyEZH_}sob1FQlBY`6=hs;*^DB`!a(ps)PlpCnH+m?3sErW0QsM0-y|*i z)E_qcobUhu#_zN5;pyjS83RjOZgcoD_%&eZCI!#Q8d-C(m8_FGMx$$(YGQGj3h~O5 zzSL8GXHcFuzBw^L6p))Mh^=j=T*oaYLxqTE`X)lLccDH$XF(#5$shH7UWjx!>y7y$@T?>TEQ zOpPMe)2acjBZh|0CqeD!asg9An1{pQ0PPbpJDv!V?pPMrnQV-S5S{FLDC?kH+q8U7 zR0F?i@cCWQJ?9kg=kgySlBqdzxu>m<#^CUDz$6Z3;sI0JKRSHsimAi02C% zCmZ(P?wk*(g*z?}1W=IP=@-#~)w6*E_zU9L*f?HomJpP6Q$ohLD&7(}wL z@d~c1*&-dT1;XLTJoc!?^BvvusB-w9cozyvHH3r86G$X8(@`>1A%W1<4Qj9Rh4APP z(o{5CZi}TR75>rPVic1CF9Lib@03Z%lhOLDhI~~PR&o@6EX>^eFzLx=qQRD|5=+Z& zf&i6hvr}WvEC>e&MJPFkRGXr!!t}9qdLL21El2ClluX;wcJf?k#8E#B2;ll4CMm5t zvG_ImlCZT`X%Ft*>rgt?96Tgp&}(*{D7N)3IEk#M<))ziloKHLQ>iAJdiEV2S!0D<{godgWgks{4J zv8V~tB?hraL2N$(V-k^g@kv?q)?q=>>uhi2!YqdYE;z;+)za<8#1pNFXNamgi#HtL zMC`}@m7oyl-nI7G-gU}G)aV>NHD92;j-`!pu48&H7-QBhH(H=R4%FA9Q%E{zC-$WrhLhSy=mqLFiNz-1 zQoxRkO$ozQ`jpAQS0Far{kx@JH<@{$L2pZ}RbPFZ;Uit{$4Y${%ZUP(huoyhq=^{} z1>09|6LBDintO9z2|vS>#WZ)wWyw^ORk;eN=@ao|z3mZ#Gt+m`E4&?ReF(e+m?!E6#m1FU;0e= z3a*cNIVc^cZ@HKtOqj=2#t0!Al!*#aW|&gDi+^$J)03h3v6l~N1#hFF5#zbGq0#|} z0bJ?|1$VZD+Z~IA8knv4u1qicc+Cdv?;n;Ji#gPr;2L~vYxb)Os@_e`PgOmM;NsKp zkaJd=q*tC}=?x;B+8=^T+KeiqiRi0q5^j<(uSG(w5X)mCjSvVWZ~1Od6{@0$j6-YO zIX{A)J~89FGnpz%#CMhno`HWoO-`}+%}X?g48xtvb`W*wn%kkmoSt$_I{%t6zp7<$Uh({YGU% zBP~MrDWPsS`D>!hDTH7!&hS4Yyys>7@}|OJ zz*;I$*VEfVW^tswsPOG5E_S$UV1%3_-4l)`vnZw=?jjs#dKK9*cGuLRa;KuD^&C2r z<41#Q2LiT=(Ub0_+B}2SkNN#_2F+K{)iHZ_3Bi9dtj;=@0YVX?ZBJ)XqUaee1_|JeN_bTOxvN5!b?lRDWL z02S>f2@R3*Mk7t-ELW;5?a(BRMj16Tnkkz3Df2}%DGuKhZ`G$uZ%iQ8UQt$o&6hQ` z-gh@3+cAbQRW;7l9OC33ZG6c0yCZ#c+zeYY7L#Mv7o)N)^STiKT9L(o*qEKyve~+$ zHIFjz8p8ClE-D#21sjc?SEuoqI#_px5~TA+f?Fw-)+Ug5#jBv2-?(qwq+C8q2LtNy zz>P?%=$%M;Ka-tA64?^HT@m6_$m+laiiVR#9Sc8!Yw5LdBkATPdj)y}rI-9C+`iGa(>qa!iquotGBmOXwuu=%%D26HVg|09h}Afc5tlzEGE~S?M(?BG7^55- zcQ?Cz>y)h-caGslD?*v>9RaRPp9(YLFh#WV;$xBG;iBK5P zm?hwW6QUvWnwsvkUnYro917iVYX~AALDuiEd^}I4@6E?VESBTuxoLB8XJgy%o~A|D zP)h$|!GH`jY)|~gW++$kH1(N~zMzt@>xo@_9~#%xUJU_}r)T=1vJIX`H2OTKstqN! zKQR~2o3E3K%X%A_n@|P&jA%hX#X=?RtvFZsU^c<`7JM-415M%|%)Az}znM+u&L7N> zuf&j(AlsYX*@8SlV1HsE<;-P#t%(>tmHd2r*>?NBvQ;Jr0?8XA#QQgc|q-Hc5QTaoGyO6G@m(`*~(1ZKAtuHfe~aA;uKY?C$Jw~ zt&5P_c@aO_j5c+Pf`d=3VkJz6DI1J1+ZddJy}69C2H(-;71YG!4|Ths9;}!tFliU! zUZo~2Hc{Jf)c@Jr=bv$kxPN-Lxi31F&^qHQaq z=^ZTHt&eI>4MAq%My*J9nkiXRiTU`8s)8>)jAIj$Ng;UbN3DwqWcHdB0B@q;;q)aD zT*IWDB;!zSDdsF}HR6_MoO%P~KaNBsRq_XNyqJGds#(xXW9S*!<)C{ zY6D`%jptCNX~)YmSO0N+rW~TuBUH-sU==O#^r=R=0->Gj1f$|DzKz~~y?d!DvY&RWJnZ5N z&t@(_)pegDZb8!YB?eJgpl`xs{^VYur|W|IBJ@3{ai=*rqc+=qDk3fdqxQ$G`yzJT zryov5*g&NtrxCt9xqL(;Zoz!gs>BP?WcydQ&^KJ9#Or%tls9aa~^cuiaj>IZ0jQ zL*wP|gL!7#uO;QeivcUD-@*&KiG!r2LHRf68hjzp1F{vw;vXZpxx(fIOB?i}?mIi= zHnq{{qr%JwjuEof{UeWsDJ-ywY94p8$r#8)9wb*OW8)`Lwy+$yKbR}fT8JOa4xJUU z59a2o{Ri_R9zGN>*M2OzQyj=n?fw?h;#Dtshq4b7I$Eg?lp@aWP+J%eY#-YF)kNRc zPcDMat<)|GCLoglieq2uvLBZ%?+^CYOC!n&pzT71$AOc|USSZ2@@2?xiI(J>o>vUBGO&DZ>vZgi^{)XobnvsdFyHJA5(lYhOqtw-az1qp}mr|eHf8W z@@rgsy@b~_`E_@M<)R~>S6gq+Z@dFoeRQU;9lo1H?YR77LXMS(YN)^tdO@&bHa1L# z`FBFLa&fo$UC#fX`U0CdKu)rzdM5`ia8>mdQ+c(qq6@3mk($9=1)!QSel$}@U|`%* zRQ}|?Z5iolc>C2H$fE<~F=<^#PS;mh5>;{rt)W_JlM zbA}D+FAm!n{jE%9B}HFYHQgd~uzoLXC^74|)koJzbH(Szn9>uH&^ep;*>Hp((Ar2O)r(?&)=(X$VQa{~{92&spxTGu0SQ&n!C?;))oBTmt zN}NiN(havKJlpH(W?$gb_7L?Org8yW`Tu^51##ouWWgxN2-6y{{!Bp& zSJ(eB@PT{3UpdK=F8Lf-z#q(z5Q%nvI!9o0k{d8tPSmqLDcO6pf+$S4D#+ak^ zf~NptFBrc{A{r7Wztqm_H5Dqq;nZuAe5?)w?^3#BxmYM*E2iLc^=| zk|0cLtM!t6Cr415T))K`IcBsF#tWB$(P z)CG^-Xc(U_U@4Ul*5hBwrN;{!8&41R-BDR4e!rG(sB;R%7ZW@ELZ1d>}u%tB9*T5BIRz# zy~o@OrDk8OPnEy@LAt{gLtKiac=Ddu$;fYq!xOq2s%wLmVOeeezEJupvR(>!X13^* zOI^8(cNDo(GIc;RM1Wi&wRrq`^2>->ci7npJPR&@4c(1aL=-Fb?6>u}1RTa3!)cyA zf{zR-sSV_FJ@T5a1O2d`v~aQjZb%b=#N82I-sb0hG} zy*~K}dO6{@=SuG?JyUn|=(&0AEBuVpIuvm9t{-;bSNF-tSc=W2mve7U3^SQ5qZ2=^ ze2Acc>hMi9!y~=?oUO%Ux9zEiToL;1e5vx zbA{yBaemd#{}|~dtc?GQlKw0BSLyc;@DA#4;6H@mU!lJWl7B$sAO8mZRi6BHfM1V? ze+(dj^PdijzmD*Nn_{|D<7J#7F0 literal 0 HcmV?d00001