From 9eb8fedf5dcb4011bd34e5a78003ff18934a03e7 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Thu, 28 Dec 2017 08:45:51 +0000 Subject: [PATCH] Fix bug 61787, which was introduced by bug 58067: Change how deleted content is detected to not incorrectly see too much text as deleted. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1819405 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/xwpf/usermodel/XWPFParagraph.java | 8 +++++++- .../poi/xwpf/usermodel/TestXWPFParagraph.java | 18 ++++++++++++++++-- test-data/document/61787.docx | Bin 0 -> 7047 bytes 3 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 test-data/document/61787.docx diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java index 989c93db5..2e7e8238d 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java @@ -148,6 +148,12 @@ public class XWPFParagraph implements IBodyElement, IRunBody, ISDTContents, Para // This implementation does not preserve the tagging information buildRunsInOrderFromXml(o); } + if (o instanceof CTRunTrackChange) { + // add all the insertions as text + for (CTRunTrackChange change : ((CTRunTrackChange) o).getInsArray()) { + buildRunsInOrderFromXml(change); + } + } } c.dispose(); } @@ -189,7 +195,7 @@ public class XWPFParagraph implements IBodyElement, IRunBody, ISDTContents, Para if (run instanceof XWPFRun) { XWPFRun xRun = (XWPFRun) run; // don't include the text if reviewing is enabled and this is a deleted run - if (!xRun.getCTR().isSetRsidDel()) { + if (xRun.getCTR().getDelTextArray().length == 0) { out.append(xRun); } } else if (run instanceof XWPFSDT) { diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFParagraph.java b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFParagraph.java index 718c73e70..30d9d7ef2 100644 --- a/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFParagraph.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFParagraph.java @@ -414,7 +414,9 @@ public final class TestXWPFParagraph { //CTMoveBookmarkImpl into ooxml-lite. XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Tika-792.docx"); XWPFParagraph paragraph = doc.getParagraphs().get(0); - assertEquals("s", paragraph.getText()); + assertEquals("", paragraph.getText()); + paragraph = doc.getParagraphs().get(1); + assertEquals("b", paragraph.getText()); doc.close(); } @@ -612,7 +614,19 @@ public final class TestXWPFParagraph { } assertEquals("This is a test.\n\n\n\n3\n4\n5\n\n\n\nThis is a whole paragraph where one word is deleted.\n", str.toString()); } - + + @Test + public void test61787() throws IOException { + XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("61787.docx"); + + StringBuilder str = new StringBuilder(); + for(XWPFParagraph par : doc.getParagraphs()) { + str.append(par.getText()).append("\n"); + } + String s = str.toString(); + assertTrue("Having text: \n" + s + "\nTrimmed lenght: " + s.trim().length(), s.trim().length() > 0); + } + /** * Tests for numbered lists * diff --git a/test-data/document/61787.docx b/test-data/document/61787.docx new file mode 100644 index 0000000000000000000000000000000000000000..0c4c9353cffe3afbd212349f51f2d70eff192201 GIT binary patch literal 7047 zcma)B1ymf{vK`z94Gx17G`M>LL4pLEV8NZiWpE7y*WemlgS!O{?h*zG!3PKq354+B zKDoK~|M%9fvwE%WUcI~fRMoCNwOdUI37H6hj*bq9hsY`celvpmeGdm`vnTd$5EFA} zYkNyhD8%kCUBMp4MHH$!DN4EoBNvx?n<*Pi#1ZjcDiLXtfCGP>CN7CGCol5b5@cdN zF^OCp+1-30NI)OoulMfzZrByaLE8;D=(+GFA~!kdWgm}RTqwH?A24dCc<`i$6a&Kw znJb@*t5J>gYQ)2XBFDlEr6q!b1%uUx09B689G+2xPADIcT1}+BrO&Gppb7G{Bj`4) zb*1D8c8Q>z{ZM-(&$PiB_wiYTGS{gTq%KTuY&kt4w(Kiea<7Jv|$-vRE%j-+b2MTMq z03_1ewYui91^iV!QF#?&tS1Q9K z;8{G=e#_@_bi_Tu%&q)6tAP z@R1?FWJTo7n7!Mbu&*U-JFszdF4F4C7bg&ifovcW)P~4~kt`k|wib-&y`{ePj<2c5 z#TB)XI1KgW--dANi0N`oH5txN@nU`sBUc|0J!G8G;mumuxTJcBsx10-R|*0EV2bkJ zqWbSTW#M4&s%dOuXa0|{Dy#Hz;k{AqrlLE76|mz)q-(#$6gUV#dO^ipW8+;>lQ;?6 z5b#%HAQ~QyU_SERI7wW9;a0wvi%YcPY1Cz6CkQp=R8Fh8YAWs*kIY0%43?qiaVLs_ z2ny~Ca7;o;P+FWyx5>)eo2)5CUt{3dhij**vB&X!KUOfW2n045IL;EcFDo(La(5~x zh|ZgGnewOAKWDGs3;|;KCltIQN{I9$BjOuuN<4^L&?WVEk5VX^L?Q<(?4zc4CKE@@ z0j&K*pXJXT8`|zPZy`gPu_{IKiCLOL^o2;=Jq(-}suIYM#AxOEGl@IBfmv2@7JZos zP6*^3DIeQFOJw|1X6bJZ4dxhhKRWwa*5s@m@dRCZ%a$C;6DJ5P_s z-A^t&#hgT}c{<#g68H6;C5`ZVcIgz~68o0GsI&y}SL_r004DCdXGgs{L>x$Hy@JxTpO>UF zw~!U8pkBTJ4i!1G6l!Q(0E$bzpxM#Hb)J$wv19{)!=ZWe1v8KPYpfGH`%NQYC5N(58-PM%RAVZ2Ma~Z;aSm=$8vu-<}30~q(rwhva@v!NMGDx zYNW3W5@B%uA~VJPM7`WzfnEAr>>8rVm%76zpGr8t(U3kd_*6uz8Yo33e~7Tlf}WKB zJX0SDCOtDcGbRM-XH0h>0>c z%(+Ffmu#=KBH>&#SD+s(6Ji&{rqErl-y&Dm(4Bk!j&72`9Q=a+GVe`9v@2SXQofh6 zi0wP1!UY;la22Cx9~JG_q!Br7gYoN(lyQsK&XujTJy@w^L*wj9QCy6(@_CbD$4qZc z;u@mUPOl>pIL=y0iHiE3su}5h_&7Q&^W1E)5bO%RynI{J+-vB@3})-(mu~G#Vp!j5 zD2?5mEiYFlB^>_&CqD?wLSIq@gm=XDz3%5|<7fOb!k{;m0NrlQE5iY>pv6%b? zMNjc2p$(VndJ@$uMMyBn5~mt3*+z_>?Qw+{>O>hYP-Wdj;8Xtrq=lwepWUnZRXc#> zOqz4MOWjb6cckAaITAcpYZ+0M9X0it6`qsz{HNFt=O^X+bLrbFG+9S}+#)1T6n4Hy z@20tOqV4Jbs&aK_{+uy+BUS(7fZA7$ZEye7OQ?kAEI(#T)emE5#MH*-bLs3dNzFI& z;+O4C8)KHYgCn5x6|7&DZg=H!^~YlHQjJ+d=DPyUJG2LKPo|ru<{|+A*Y}n7|M77( zb1-#-nA^Mlv0BvMw}%N4`vv);^>nGzguB>Iqt2l6&Wx)B?%8u6zF#y%(iKS{)hJL@ zcV9L$*x*2$8*jn23-NRiE{Fq_pjqGFU`p~h`C#ovf@tNsIgczHigLc8O(k6JQggT zMnV$*65TgV8MOAcL9Lw+DaN^S58#=?QX4^$!W8F`CJnd4{t>IM z3`N^+*T))H;@t{L30-iBThhxqSpP_%zY1;-{9(71lW}+?*EIHp5i>V_-17W$^`7wX zho1{1!NK1h@Ekf2J#V+$_M7~CQBWKT+w!)u_EyeEI(UC=bYv)Wzb1^a^}$o=F_Ong z(*{=Qk8It9TFN`YUGSF1M1ymB?cc}Gn-WAGQON{ML4PE2C0mh&j2J>a3Zc;X*2=3z&dZ$5)|#&oMa*;79dzdsLxL@7l)&lY>u65w98-0bYU7FY%6*IgdG3*@CSEf=vqjk6*@xw2R>JDO6XbH{W^C{j;ia`8u+~al)_Tdk&UNKI#nA& z)rl?~uQV78aX587;*wbN30Tv@K;iUo-vpO6v-&>X__sgGXo#)i=Q0F91O4cJE-AKeR?gT4kBsaw#@k93Q#@zh8;^BAS#>+{ zCNU$TAYGZ9;{+hi2#~TSmO} za(El>SajIwB!fqYdi>T7B z8=FVN96oFFZ}CsalBdHZ_Fm=<*K?Yinxx>%BgmqYs!yNQjw!K1mMk5-mRgRjDL!PL zv?ZmN$-Nq&N?L+meABYSEIqHcogwvQv^ik0JSfnpi6r+59``!@GIC-l@cpzdTmASA zyHNI<0;9^x{fdT~X$=fd3_CP(W2)K5^247Ku^Pp923^NNLn=EgTQI>|mNnf_p|lhb z;qh&zb%(NwvGAQQnuoFXW$fhvePS^^6R6Doadg%4BpLS!(6H1^g7U;C<&l1>ary2> z&;@ge>xQkiyRVUe`3wdNi`xF9N3eA;U5|NW@?c1Ewp=yWhm>4RCDw)_sq3Ewdy8%* zMJ6~_No;n^GmRQVtLY`OIxU>?c=VU*Nu8(2FSl9KpP$;j;9(Ic#rsAQW4?;IJ`I-& znIQPCqo{`0ODJZ|q59-X>oH~}>8C`RjI~$qp*g+|VmhNR9--LS)Xh&M!r4r$5{)yK zpssodZ<&R;FkkK@uqRrf{HV5@vOe);w}4!na9Fa231z0TH0?4We7Q36nWAA3?~l{6 zp4DTaMm0NIXGP0Jr%}c=^uUoOu@G;Gyfg z4RX>ZjmumLI1C$%f36l9YNd4zT@(7wWlz9A!pMsn}hBCbN&2udi$}+ z3{LjXoYOHu7QD_z7I#->4ZB3C$ZB0NZG3i=J_UrlU0Th+t6rhe@lPvEH{p&1?73!A zkI-p_pHgJzCZbh>4#?OP88PGY4Ayv!4pv`tQTtd}(x#l{ffVGO@#pP{J{?_A& zzL4YTTy9GCX{%6OObQle%g9&z?9)|>zA?kfzPw*y8#Ws&Ph*AlU+fzuKkG1YU2#Ee znX1GrIfLSr#c@cx_gNKPCTE(F}cT?;gbh|^BANVGpF$%L~u;+;* zK09bFH)V2l`i=<|Mtg=@Td!=!=WZ5pWl@=F z;m>oPl$mT-E3x4Pc|%_vw!UUtS}L0wERj2^VaibQb1#%sa2sm#ILrQ^o<9N7jlMQi zFmlM7*V8wALfLuy0a<8pW}os`Kc3j0ivM*b@vz`4WGtmFhk9-k*OV5WnVYQua;J4d z$(0QMgxfdqBqTlY@n%SlQU{SFlL*P(0d?wjt0KXp)-{`-i$r1g%!qU+37zH9ftkZ6)iC%s;=YEIuc_ z#TTJpYW+yRx;M8P*g9LMRw~0TiNfORcp8p|h@wA|z}Gr{7n(s|s( z^YX5~KiUXJdY1OiWE!{ZaA0j$HzE8=>MLEwOMQBg!7XpE;`cqbe%A+m6O#Q$&!02n zw=4fhCUm1e%&e-}V>owiO*2GBl?R6o?`)?$@>*VhlrFR`xpa->`A8nEW$G-4P~Hq@ z2A2c4Bl+X{)c;{MuhV3e8jA%02x|Yg)jZ|DT6IHbb32zOzpv>zAI?-JDA^!!WB(xo zABtu(6eba<3NXe&(x|)Bro`TIP$;ov!4`;4gYa5Y3;ps{bK({qZuZS2Q z7ckY+U!Zov56>tb6Dw7^1x|K{U}wOsZ^OqGzpWcbRq%u?cGD27HEdf*IkceTvvj%A zda|{==!j_)UZlRLWjAzdwUBb_)oh!WT57;jyC6D|JeYJd4F=^2doi@|%J4>P_6zYs z;4iT=6X@!s*1)gEOD9nhmcCKFrW~K551-2+2=NMhWbWyUGu$O z^QnfDJh;mAO%~Pqy(3EQmBs(u_}u#v@DDHj;hE?OQD6@jHtH8b$FNVL$|C`IHq$7y zZn9f|mtTwU$DS%c&o9l40Daw7H&>P|ht%#&f>A@z!Q4_Cn%x5MfHmsQdh;rRf{qZQ z#ZlbHAPkXC8&#o5efMcI89EfF@Q+C@iyG;hA=(AQXs+fw*H*He=Cge0p7`W@MO96F zofO@YFQ(a0Yrr^h#u(pY4+p2@#h{)IyQi^)hT(LPCEUEtHzg%r4dp$ETO7C0cu9nZ znQ~Z7(AC_}gY{m+`Oh;YyuU*4>AlK!-`yE%dODiB7(6t24T*nBZ4NKL_8`*E)?@i4 zcfY4Jg1^wkx*OtUIXq0Or;RL=ul_W?A$YdB6E9}?t%J=yb!b4XJj1346Prmuz|Q8P z#*5`hOBy>5fdpwD9#D`RS^fha7l%m()GX(4@tRB@B^FnEM2PxwDOJ2W$uplYRdq49 zGi&TyeRv?Z<4{MK?r2W+n{lTTXzNj>EeeaUZNM9)$TZgME4pm18{Io&dy;hCRA)db zPpc^|1oD3W4Dhm46`D9^2K&-QX9Cj=R_4n}iHEaoGi_PL$tS}NWIwSN(_T>VbOn*< zt$4se4dm3qIEWz1U#O0=OwkVgwkWn?DHw8|>Tf51@LLEQhLUb6+)Hx7I4RNt$$?Kn zU8`Ry9{daV^=%w<@7vb5|G@km|GP^1pm6_b)AxD% z2mW`x`|t4IrMtgAzT{ra`vd+LVfF9$-_?N!f$vXKNB!61|1J0Z9sGMS`s=$t!Tcxq z_tNus`0oULs91m6?W2Ff|3m%y`<;F#zyqQGw8tcWq4(c`zeo0gAAcGF>HoY literal 0 HcmV?d00001