From 84811da73d9eed2b8ffff622c09539ce6e2b3fbf Mon Sep 17 00:00:00 2001 From: Yegor Kozlov Date: Tue, 27 Jul 2010 05:19:15 +0000 Subject: [PATCH] insert the content of fldSimple fields into the XWPFWordTextExtractor output, see bug #49455 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@979538 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/status.xml | 1 + .../poi/xwpf/usermodel/XWPFParagraph.java | 5 +++++ .../xwpf/extractor/TestXWPFWordExtractor.java | 12 ++++++++++++ test-data/document/FldSimple.docx | Bin 0 -> 12837 bytes 4 files changed, 18 insertions(+) create mode 100755 test-data/document/FldSimple.docx diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 043a23cd0..e2a5d1dae 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 49455 - Insert the content of fldSimple fields into the XWPFWordTextExtractor output 49640 - Fixed parsing formulas containing defined names beginning with an underscore 49538 - Added implementation for POISSON() 49524 - Support for setting cell text to be vertically rotated, via style.setRotation(0xff) diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java index fed5b097d..57e1a6467 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java @@ -20,6 +20,7 @@ import java.math.BigInteger; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Arrays; import org.apache.poi.util.Internal; import org.apache.xmlbeans.XmlCursor; @@ -41,6 +42,7 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRunTrackChange; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSimpleField; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSpacing; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTString; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText; @@ -119,6 +121,9 @@ public class XWPFParagraph implements IBodyElement{ for (CTRunTrackChange c : paragraph.getInsList()) { rs.addAll( c.getRList() ); } + for (CTSimpleField f : paragraph.getFldSimpleList()) { + rs.addAll( f.getRList() ); + } // Get text of the paragraph for (int j = 0; j < rs.size(); j++) { diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java index 2277ecec9..5b47b6d0b 100644 --- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java @@ -250,4 +250,16 @@ public class TestXWPFWordExtractor extends TestCase { assertFalse(text.contains("AUTHOR")); assertFalse(text.contains("CREATEDATE")); } + + /** + * The output should contain the values of simple fields, those specified + * with the fldSimple element (spec sec. 17.16.19) + */ + public void testFldSimpleContent() { + XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("FldSimple.docx"); + XWPFWordExtractor extractor = new XWPFWordExtractor(doc); + String text = extractor.getText(); + assertTrue(text.length() > 0); + assertTrue(text.contains("FldSimple.docx")); + } } diff --git a/test-data/document/FldSimple.docx b/test-data/document/FldSimple.docx new file mode 100755 index 0000000000000000000000000000000000000000..029016ce07d8058f146efb40a25c67f04e04c2ad GIT binary patch literal 12837 zcmeHtWmp{9)^6kO?oM!bcXxMp53a%8B@mn-!QBb&794^*1eXxpZfEAqH#0f&-FyF^ zQ~h+;?x%Xay=&K6d%a7PWWm7E0gwP_002M&fFTbL3IqWF;vfJ3GypWHuBfAftGR=# zp_-SIxr;u7r@b9^BCi-MgdBEG`2tm^3AWZ1 zaoNrwx-|UG%<}R2oIpml>|>EmM?yEt`QtboYw)y8+B7I1jjhUpqBk8Vnf<46I`it; zp}SOZ?2r*LYlK2Ogkg!kvFIMxQvg{R$A>ynaE~GGQP76&ejX;*B;9K9JYB&Zg<&LS zuyzb`i5NBpaQ8t6=os53bNV>La!-{ql$}HOV=z&uAy=EJF+m)gs z?=wAWln|?-gD*A0k9KQ%n$FVmFKL3UNPRs~GnnpX9%3vtCfKhN@YAff8oa8=*{U|vRQa*|F2-;=CRlPLmg^7G+ijinTBb{mo+t7Qobg?sw)1EPt zYj=}+?0d~WT?;BHGsWeG5}>lu5L=00k8ALmI72EsO%-0){=&K9dItjlyuN|~l>TP9 zgo(J#S3q@C02(G7&~go(&Fx$m8Ge5L*JA&J&Gav~9+R{p111a~b`G&&JnYB00!uox zv@Z40gY+0IB>e=$197}?xwE5yxJ_xrVE@OrF~x<<;LJJRVm`=fW9|98#Y`i~4O>t9 z=4dD-?P1-FAWHT}}0gS9wc9E-#lhB9_sVlbhxlOA-A zwV}SI!?k0Yq{vP*set9!w9cQu7JD^}jtixO`3Ms^dFAVt1*T(wF4t(8eGq4B>?nc|866`ylLE|z$>j_002G!8pOlV*^KdbLostSb+ZRLx}P5I52FDA zx;fyl|Fe&ZWO>C9rr)>una}7N`msxOSnNyXRhO&us#YPXu7gM~9T@enqxnz_@)jAL zle5yFb-L!&e}EFk)&~{6?Io=l-5bOV;9m?uz6>LsC* z4_20{rP7j~(|a3N7NTQvf?H!O$!jPJTg2!|HlIuvz?ft<;wKnv9z1T947bmDPaWN4 zk2dUY$y*s4WLunLTf-K#2`#o|fz49H7y!QFvAsER*V4NkAYz~5alke(I{%dXBHV#( z5JT38v03o(yM(RM+|cW4#&;xMSL;gn`Wq)=XZGnF_-45wl>we|g9%QpN6CiqJN2^LII`vtNh>MzT_tSi=z7G9@f)!1so;?J-}$O-1?>h6vw(V(TBr zuJBOYOU11VM=Q3<`_QjLZ6#c39cXYLbaX9|P12U1%IGz5ie>`UfeZOyR$$)n#&rSN znOa%gs%sACL2Lldt$ci|6U3-Y-ZGJy9-|49;f9u;@T8nk|D5YEU^cRYCY3BE`K~844vBTdaa*3uG{@!!in`4 z7v_AMP!g6n9vkQYRM9SnRvm(?52XcD4`GvPav0Td0 zQYwK2_pE`uxYaP2P6VYUk}TGw<)fpm2+^l{1>d}&vQ#TE{ioG8`w^>GYiRS~H+<4g z$6)m;E#@e6m8{M~uo%Ry)WZnMn`LV-`;Bw!e+*|bXE2>?hzIxwkjj9ZOuIMM?s1fi z4kX~gvqTM2%@(Dg)*7OH$!^b*<@m-Sk$P&79%5fXo~mCVfYdNA_`XLFsH^|JWAe5b zbR$Cn0M=vx0LCBM>}q9hZ_fDJC-cw!@)E`P5?+>n{hbCqJ6lrZXo7j(IeXuj~&5Ob&aG--GG)s631dz zz5qGq_1()8;l_?)O0|IxGPA2tNFkqt=ld~pq{PK)@!0FTb=cgPi zA)jg_38|2n=`dEUJk75zWimBoJ;}2B=Ym>Wy$G(PzZIcD7GRjFfPC3hDf(b7;@b+a(3(OcZO(-B$8NAs z?ILf?hfNzJ%*E~mun3T+g5Wuhu}h^z!WP5VT0;vu^R!&R+l8IZ;t0O)ttK>u7BX7p zqb$(^zF#nCL#iUngyKL22OMkRTbJ<#l~3(0yDEXr2N|9JW6PI z$wcqq7b;f=+pQ;D&W6#n4FT#4A8p+m%o77~_Ij5JWyU1l^&DM2_=E2r-|YF~ZlTfp zpD?dGxw{-E)_FIwu;#Dsn|HOb!XPVKBPf=`+$G_(ZxdJ+)X^jyWJKF3PWXcn;g7ub zD-z`;FQbGk z3qMRVPw`R9^#yAB%Doq3Yz4>DDelbMVaJs%EUeUNxhiYtX-ie6?kNXpmP=K2%dx-8 zsk&Cki!#6?Ko(npmI@1n0`}OsPUk;u8AGwMOtq}wn_Dw$b4{X;AR|d-kP+Pa#?eqp zFX`N;;UB(PMpGKoF1KxTG(KgM9?dLgU`jJTx35d7+|*7LTDf#Kd)vvS9fOEx(9xuF zl@OmC#u@U>><*VA!L8Jb+ybQC;^KYH*h{|&hq}O+1wY(4eGqI`iq7gcQ1=HR(!%9I zmIm3rRhcwEbR)&_s|e>DKMZThfXOFw+zqRR_}tF4rxkK|HaV5t=)HoHSkmzs!{ zv~6n>`Fcs~#8j%wpbU93K!)p#i9pz}TNfv`sdbOzTf@QkH;)pCu5`zTNns%M=U2Jd_Q<1g>q-|%(DO-9vP@H-_m@JvPd zdUnw=&B#}8z5OVHh$n-p(nS^kmSGQoB{G=lwJ)*v9J70YbIyBRxv_S$vWK4ln-f4t z$P=Phqk>)vCKzN(LQ7Fs!8K90h@#WZYNbUxcF)B}UYK23%J^hW2>a1CJ(lG3K;q1E zpzW(Q>j_jw^}cR#J+(Ye{weM&dt{>@PLOl6TAw`YQ!xW(f)?+F+@zJ70J5Ly;D_=h z6;RVw{f-=tQBK#bn2Hex0T-0&IhJq4(N`#an<*>3 zL59Ru<~4Rk>V|drsD~XBh&O>?E5e^YG|<4=j|Q>Z;GFY(3r{UwGc~fER7gH&4dUNh zs^TujoJ2c8H-@t1Z9=vgKDk5<{b+n6XM<|`?l|d{8}@u7T+FfWIG0&^t`u!*DdJ>uAr61=flkg7^3VmF^7toO>Po}G?eE$qPDJ1`lv zE+|U$IhK=Y-IxuF5u6rzm0C6OqRq@fGxqD3Lxbd&GUh3h`!V=h&Pi1pt$^r=@phB$ z2toWMbzkd@nEhSTUE+_dz?nLd!R`PqxIJgFf%;67`_XBa^2xk;?TBn{O(TQd7dCT; zMcPNSSwz+L;zkLc7#!%$x=nenudt=cUjo!bYvrl~lno1aN0eyt@vu>+m?vk)L6l-v zH8Hh8oyt+5aP4QpP{@NoDShTx9|Mj?Wgujrz+rN}VS${*m67!#JsRe{?N7T>@6JTD zp3};w4aK!uibdCu#k{V0(p|>Os|bsW!_4VsXcpR|(Sxi(ju4d==0Wv+!z>MQ#O^oW zc<#Q2b4QcM-3e0+Cf(AWVFFsjs=hdh_Jn3Ekmj zgI1?gRi$~XI{fgweLHYZZT21Va;T);Rd+YJvkm?Y=V%BN?kcf?ZB$bB3ab#b!NS8O zKL$!Q%!&4QdX;N8aLalCb{zH!=vT5$Mr=lA_M_C34&5fvw(O@k+=oK+v$b$b!?PLG z=i<~3*lEaEHEd;gO}exKL7Ef(J!OZ7wkQoy_W@2N#*3j~cf|Tqeo^bK$V;^@$^6)4 zCA*u$_0nFk^r}@>F?T8ImYF=2<^f9chEuz(1M4lgK9^m=BXX2(>)-<+t7ZBf3Ic!m_A#4B{B#f$`QcI7)I zccO+;en6xKb#eQQB1JFDh@Dm>jY#1Y#4gL!cri*F#qI5^>v2}fXT$26(&6{Z0$4vFefH5OO zZhqe6rE6_odQV#C+Dxo+NgDJ3uy%LH8#n8fv#C&YTrMLv@E#*ik5Bh+eGoG20E;e| zDzz{uhYc#~2`yEvW;|YsEhA57!d3*~X@pWPfB9~bz5<_VDOLKEQbr}L(~!OV30=2x zzyTz7oLYn`o{Tv|O(us8_ua}sIz9xAPVYy@2q_P!$b|`NhZ%9Jib4CwV zVvwDF3K4y7f%!HGi#9U#W1gUwmEN6s#(*GnwmB;9Y&%_nBHAIC`f}!jr${wT!)YIS z#FxP|aF==|zsPtK=K}#_dEc=V2rZ(j*xQs`!gJY=uV$+gGDroHM47xK`?#<(9;{Kjm8(8zo1<&RTP7FO{b zOt(6<82LR-zVC>ga$hu(DzSN+JWOxZBTY{oH9tR8=gmCEPa1FXU6Nz4=(Nmo4psG$ zDL#;TYqi{Z7{fto(*b~Bi~C06ydcPF`0?rG%2)4Rey3hnTc4Pu!hI!EgWbHDc!JI)of zmTh_no$SW&t}$xeE^~Z*xA!cX^ezEQTfNf!X$1m!lAONJQHO5upinWEw<;+)<*Uq?JeMLa${7D^~hm=zHk zcSyYb8@Vt?$k4!asP`NC8poK$L;5}FILee2NZ42gtg?zCl%%%%HKnBr3>D!d*U#*U zUUqAd15CDGrwba3QYm`scUU{V19S1@V6jD?S8G0eRyfEu6cl4IE(h=}D<~4EC!Rqi zz#4jM`hNLNEIU86%Rw!GnfpV~&@82B;wGyo2ib#|#E1gj=wd}Y1s1Dwi|z}9O3%%^ zR%OT2=Rw)}s7A$JtTQo`+1KuUG&syNI}L$WHNtjp%$kFbzL-#w&7A6rm<;i8XK6>% zuqO<9srdAaDYcP^^-s;H4o@_mcZK?q=}@#t<27!B!XZKfDbqqFClmX5R{MR`dh!fo ztlB~iAOY%MdWV){f;j?J=wnU#ntlDqIwa2+S?^4?Y%uxSyE^4VKqoyUY_uS>8@d+&$$Y#Gk32o#(th$zx#cmk_F zb{d-9DChkshyBP&I`1;I^kq^<K zwM=DBPrpOC1l&l3=`@His44zpgAJ?A@4acv>n98XE26C~Ydxstuh-MrhIKP~J-O=m zbKf(Pzn&Gro7GJOJ0tuU?aOKpq6ftS^+M5oq)Pmdta(9u@5;c3^vc*`WAUOkAz3B4gPMfYImo60Cy-sv7KXU);ALA;aQqIt>k-%aqcs9-)kE3bL6_nc0ILr$O6LLgg~vdXvqU~jtcxH zO?^lSS=zZ#wam1e^yShBbke_>nKu#Zz+smq$~OTPx29b26U?*HW=2SVY!9nFC~PEI*^k!)oaFpmnJ1Z$nO9BTZ)f{O z+I4T{7x$l#Fg8(~K_U|h@=BExB>w}f&ln86y_sr!*nBazI8QRt|8LmXr(a<`$$p1D zLaz8ZubEJ8t#%aR=TQ*ijwmYYpTH=wWb4pVPM2rlF>ltPF}3k3K56`1-Lc?5R++~V z?wk1ZkZLi#cmThpQjU!fm?gb*FK9Ht|B_vCQ-kDOkbnhQOG1jcDFuq7Ur|MXrzLpp z4LwAVM5H=#p}@>@zT%-818LScuLJ7pv)`dFE0z)Nj;nNsfEQSEc-hy`AJ^n=Z=s*Y z4NO|<(9+(Fy=QMBE&d-82|Ze4}I}=BiUzKoTAKu!@D)Y(e~S;7>nYKbRL-Xt$G3Kb#BSH zcLQdPhs1glGYr>W(A2sR`%ySgP6YSJTTLz5=1GTwtfsJ?UAw{;*@tk}=`kyY2zNZJ z8w&AmP!;JqBXr5ek9T53$X|0#SOI%VZiBvR!VO=gQ^IL;(1*V0%Br{_`k|e=NoUko z(H8||A7(r5DISKu{-fVVF!(a;47{=f%!8u<>ny<^cT);-A|}xN^yKT93zW*+l`CCu?3?3Y6;@5UzF9tU zFS#0GP9tV%N+?RP7?Fe0$EbO;lT=4*`^u8W>&Y5mBcM406hGbXiBsCq!~|Pf(s8M6 z7UUKB(VT=-pxDclV%bu)5`l*yZqBqMnL@8c=`e8kLbwhuen<__bKY&hjIl44ic3;) zO>#euk`RK0g-FSpo;6FDV#Vrt|1uc_dEY~aL1?#l$aSV@Ppwm9dfssqp{|Zlrpzkw zLGa`GRdJfdyw>3|EeQE!p_H;Qh?m6VY;6ptKk*QUuPJRfnyxIC_H`sP7big9jH^CWR3f#_cLf^c7RI2USMFxc&` zl%QNgZjZo6;#d$qG!U~Znp*QAc4yZjQLpH0UrKGf_eTRid*w{3_=0EMOY$l2+1J#2 z&VP)}t*ME#g@6Rro(BLx`a41WEi%vC_??wH?!N$8skKyZBJFI0sI`xI*o_sK%Vbm~ zvb_D6B`w5Rb=UkT^I+Vm{)qs}>(sl3<#~_y#2sF@$dc=d>3cG7Wc?qG7H`c^50of8 zWM6OgXQy*J)9?~QNM|DT9^zSUH#(l5n>lYSTfJh;&H@9;1*3zDy4WfCGv&S;(V8Z9 z&6LV*Zb?|yGC8HM$(tlTFBVA##;F?5>4=Z=DOd~*6~8m4EYOt3 zF$2eROLA1tIImn&tynvzlVc?q=7TEU8D5GGm62hcO%D601(7~+J=hLvNm(U{W(%94 z7=NjXe{X=FrN2(WgLVuJdH&9OyDKyi&h0|jp`U@JwPU=he!H%vS1{Mr#QFS1ZZ}Yc zlc~^%Si|V}$}5SMC`T$W^GYu=7^(TDGFUNaRjr~!^W*n7sUKdUIQ*lm4ks-hd9j+H zCeOr4TURz@3kr_QZ4^^G<>DMPZx(P*82l0mDkmyyVcJ?>b>DLO?epmwYpS3KF?Gi* zV#7#-aGS(RjP7sRf!uF1be^BIL}gDyfFl4ZY%m#Y$q-vX)9amK;1&1wWuOGjT39ZO z4$0sg)(s3SWh@g4WnZst$3ik`ampuY;ES>u2Rc^F%0=!cH7J`~;d9UYA45hAwcvj+ zFi$@LvzEl_+2Cclqqj4O|eL(}rem z5#Ds6g&3Se23sr?lz~~L&40ajLVMFE|HQlsPI+Eas|9}!aqut|(Xv`%M!{bq0mX1% zw!`GVmRQ#$P8RL0%a-)1S)5D~I8ez44#0o|(Djvkl#O-FSSR-;XmD6f{^)%!+`>q| z0y%Q~M+J}!#FV!DInS@8XddZO%{lZH*A5<$dp6A+4ORJBoqBYdl(QrH=Mh3~O{%v8 z8Bfd#&7_(&GDkQMOWhy4YRd`FtK~vFN(A-3v-|djj`X0q6GRF*xS@}Ze@;YV$d9=( zY;1Nek6E8UfIoiUwu0-}IFPgg$7_%C<0jGRrdln|bc#D&L09jPRUvB2TGjwa2}wm8 zqi;n;wPLe)_$@x8q)o$}YzySuTEp?>x#Itaai zdG-iB4!vO(Y}*;r$UwR%nkSR5hVVHvIFPc;)NjZI>-b({G`Wc?>=MWyDf>vCs8wTm z!FvB=A{fxws{B2LAo>k~xzgg66MB}NtKp(>>BxG@77DKr`$9|)?x#k0WL?*;vE0!Y z%0YU_l9@f7dJmpST?v2;vNgi9eX;y2uJ=N|m)fW~s9@UeOu_U%J!`xS?e~#+UWu4+ z3;{?hEeNxKu=gU6B@}|oU-``C3q`}QBq5{e$ILPM8pvp`f-1);A~ZoI$}&T)HOEfu zCqpgP$dwjBY!>7}LkY5V=xL8B1KD-f$ZK@UBz$7ZGDhD<>2sFnIa3H-0I4A5PfiH> z+u&cEkg{fRS;@vmy&ZMetQCv5W`oI9GS|;G{gRSi~Bpk(d7`i5olnfmVN6-j*BnluW1V`@d zoem4$j;()5i{=dMMO0F4he3CDwF%N;P=TRy53s3F0ZRFXCc{I(C1>u;ac#^0Kj+=V zmjdL3XhL@OTvY2^IR33a=LujR$P_Y`ET$|enn2My0nBR^d7^230CK`iK(D3&?6xP6 z6Qc3`Iql!6W@fjEt!QbfI&h^HzbgfmZOa2{8@KX2cku7`w&n9`z~<0@u0?i6Uh1~I zQ7xF7>3bDPBOvz!N2c620!m%H#|m)5eB1jc`v1Nbx_Bo7x{Hi5>p493(x68I!OvBY zEKUokE)fJ+H}s`=cV0m%AgRa0ZC#T4I6*&H6RL96C7*eu>e@O94d)!w z!~wQYT4lMA)$osNV^6t>JW-I;F`6%5n;K(Zo^gfQ^d$M~gM);bU! zmN4Q8ym*po&n;?sU#t=K)_s~J>+3fL&%nljusG zMd0|h$cbDaBf@O^f6-&=gW8hoxq`zHHv}h?V&$>-XHUZ0$x(=}1znJ>eYEFq6 za>l`(*$v4kMzrEVao2R%^O0aBP)5|AB(Y+bSHLjN|Kah*y`D>$S}Oq>ipV!x0uWOacUa(zzpe4 z^A>J%Pn-QY9TvdTqB1GhVkXNz>Zn&r_Ka?ELGgmgD}xG2W6@yht`K8{t=j)A#?7uE zVNFTROdx11Y?&!`JBhmWEeoc%1ziNFEz~Dm5!tp+w=H3x^Ji<4__!|OxRGBP-ZsiO zk2RtsFjH5O8N1FPFS3#f?KSbzkin{Wm56+QYw& zER`{ZJ|^ULvP&f8vr5$x z(UA#5Uq+-Hco(YLYYFn*$!r2vb%w@nNKBcu5NfyRc4BY91&vLNFJuLy(Q#1J6RehD zcNvQCu-N_O7@iRsw8uIgy?DOSeR67#xMtDG9&brhVUP7*WK8?a#lnvj*1${0-$#=;GIL$ol zc#Py;>61gVakw=frIZq%3sxfopuFRZ+IXp{bqjdT?pF@J`<%a0< z6Og27Jo(hwG-`6ED>M_@xS$@{VM)z>(8I!$ngIX|Fse5SBPIrpMOHAqWufv zuZ7TG#eXfi{3(tLd~Uzpz%K=uUlD#SK>UfIfc?h={;3@CE6(2&8h@ey020Iiz`vz8 zeii>U%={DJob+$g__+`MD*UV4{8Lzu=3l~pbE3b>|9#*6QyKu!rTfd=|FsJ%$wC0t S3;=)!zF>fw3d#8M+y4O!5{P;L literal 0 HcmV?d00001