From 5acc99f2a1788950ed643821c8d20becdeed8918 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Sat, 28 Jun 2008 18:54:02 +0000 Subject: [PATCH] Patch from N. Hira from bug #45001 - Further fix for HWPF Range.delete() and unicode characters git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@672569 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/changes.xml | 1 + src/documentation/content/xdocs/status.xml | 1 + .../org/apache/poi/hwpf/model/TextPiece.java | 16 +- .../apache/poi/hwpf/data/testRangeDelete.doc | Bin 0 -> 104448 bytes .../poi/hwpf/usermodel/TestRangeDelete.java | 196 ++++++++++++++++++ 5 files changed, 210 insertions(+), 4 deletions(-) create mode 100644 src/scratchpad/testcases/org/apache/poi/hwpf/data/testRangeDelete.doc create mode 100644 src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 825c99cc5..e353ecb43 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 45001 - Further fix for HWPF Range.delete() and unicode characters 45175 - Support for variable length operands in org.apache.poi.hwpf.sprm.SprmOperation Avoid spurious missing lines with the MissingRecordAware event code, and odd files that contain RowRecords in the middle of the cell Records. Support for parsing formulas during EventUserModel processing, via the new EventWorkbookBuilder diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index cafa18cc5..7e2d95156 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 45001 - Further fix for HWPF Range.delete() and unicode characters 45175 - Support for variable length operands in org.apache.poi.hwpf.sprm.SprmOperation Avoid spurious missing lines with the MissingRecordAware event code, and odd files that contain RowRecords in the middle of the cell Records. Support for parsing formulas during EventUserModel processing, via the new EventWorkbookBuilder diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java index bc33954df..227200ab5 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java @@ -91,15 +91,18 @@ public class TextPiece extends PropertyNode implements Comparable public void adjustForDelete(int start, int length) { + // length is expected to be the number of code-points, + // not the number of characters + int numChars = length; if (usesUnicode()) { start /= 2; - length /= 2; + numChars = (length / 2); } int myStart = getStart(); int myEnd = getEnd(); - int end = start + length; + int end = start + numChars; /* do we have to delete from this text piece? */ if (start <= myEnd && end >= myStart) { @@ -108,9 +111,14 @@ public class TextPiece extends PropertyNode implements Comparable int overlapStart = Math.max(myStart, start); int overlapEnd = Math.min(myEnd, end); ((StringBuffer)_buf).delete(overlapStart, overlapEnd); - - super.adjustForDelete(start, length); } + + // We need to invoke this even if text from this piece is not being + // deleted because the adjustment must propagate to all subsequent + // text pieces i.e., if text from tp[n] is being deleted, then + // tp[n + 1], tp[n + 2], etc. will need to be adjusted. + // The superclass is expected to use a separate sentry for this. + super.adjustForDelete(start, length); } public int characterLength() diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/testRangeDelete.doc b/src/scratchpad/testcases/org/apache/poi/hwpf/data/testRangeDelete.doc new file mode 100644 index 0000000000000000000000000000000000000000..896108397cc74c26f901312de84ed809c4e205f2 GIT binary patch literal 104448 zcmeI0Yj9mv7035UlQgMq32msQ1i6$*Uo`Xyt$>tJN*WApX?ZBm+a@KY%?;e7(1K_c z@C|~fD58KCe4tn!iVu{6g3%e3ao_`^AJFlG%;=0Vjx&5{<8PgP@9ECHdEBIFHS~Wv z|8v&fd+oK?{+)gIKIzN9Kl=5DpP2DyS8$!-#<~5y)vhwAFT?(IzFgy+l_Rvjx3@Q6 zy$ge)t}#MjUoKC3hvmGx3S{s67pMwVqY3C8Xd;?~CZnSeOTZnCrl6^)79E48p?9L` z=vXuZ9f#`B@#tOX1e8K2qM7I*7os(2En0^z zLhI26bTPUFU5YM4??)d%m!m7tmFOyTHM$0+(MGfhwW2n(8C{Fo(H3+a+KRTJ4wONi z=z4Sm%AzjRjkcp3(M@Ov+KFyPx1bNAThWKmhtWsSMMUbPXNs9;cxK}sG?aKvW5t9^ zy_9a@eo>15P(B*}0D+-cS$oJo%9RU$|Kq72?ccwRG1h zvbv0!Ikz{yyH&P-w`|#z?dH^nD`*$oup z_J(Ed-^-u0&1r~>_2&m2!f&AFDV#bJ_zJn46Dc2veYWOby=C!!JDy#=jU6@3&bTe! zA#GJJtao$kqs6Xqu3N+Iz7Cz+Jl%CPRhM_63z{Ogwt1Y(H1)cxc&2H)8oL|yb2Yki zR=nB!aD0v&%Zaj)-8juyH*;F8=b2__ZRMWj-o;Mej1MROnf3tVwO{P|I^j(-r*|iBTm>g&1IxtM*4N6T#-w+jx(~c#^TsnSB)_9Mfz+0QTn^T)ub~c|{6Gc;^I_z>zx-DdC=Sf+7!k-yuH8VPsn(>^K zY0jQ*d^+Q^XcAulQ;A}cCX&_U80KkJk#W1SDw-MhUc}SBFILAiEFUXPjeB5nJ;P!s z_n#T@di1r!yb~b9=EWnvnQh-Rc3FJ8%Iw3%F00@Wk9{jCpFi#HoJ-t1;B5m_?*M6+ zOYt$##$lU^57G|6*x|gCIQhjmU=SMC^CCLpS7Jx|;*(1o{hWCDq~p(lrIn@i*+6FP zye`X-j~7PT#dD~uk$HB<3n3dX#17_p309UUDYCcO+K0y(C1%k=pA?@^ww&!}U>`A! z;E$^AWUqMjy@4`7~EQ z@xVy={pCPSU#qwGsCcijg=)6#OL@Dmr~lS>t|zyq^Up%BuxviHVZ{LUpYU5kTMh4G zBB#IQQ2bhK;MebX@Xfc@wAVa#PnDZD>#5fa!S3#e6Ts)af~Wm`Xn!a12@(JOV}B=J zgA9vpYWDY{?es^Z+mVg*#yXNWe1>Yh;Un2#COl2^8<% zzQ%7g1vaB3H3bey(fcHxiIF(?2>rT+m5H4gVaGx2#J|`Gd-+yh6W){husinu_9DYD zfv_0b1Nl%tklWx*VZg*5Zm44a;+j%|Cq9S57$l{Eh8sbbfkF*Ze|Sb2B!Lk(k$ISW zQuIdB4h1z~YhwFJB}I(TE^hTP3w*-jKz4D*#M3wVu}16egAXg( z2)ZQk52kMt0)t}Wk8wyZKURrDBzczT@7;zqL>RfCV?i z5+iK&HckF2Zc>d1h`|3j0e-wbLvJL-QBOOG9~#)nm5dnHU{mjFk~|ZK#PB}--jnW$ za}o_BYz^(OiTZ*j-`a0wLC1bQ<9Pj_O1|mP-IIKL^5H;9Q(}azCE5qmx5Od5+mHj{ zFr?40MwGJ%h=2%)z!62jA0qK#lDjeefr?2}VtCIZirB$V7HGu)2V$Po7k4@MOcX%` zL_h>YKmYKmYKmY zKmYKmYKmYKmYKmYKmYKmYKmYKmYKmYKmYKmYKmY zKmYKmYKm3uZXUes>^=`hqlzv%yq}+0MI`_?Sxx=ODZT+pKmAXxg(?QwB zO;O5Xn~(R{PV_CbT}#`A)OETwMA}TM~Zb#fRB$aYE#iMPF zNwqPeWn>Qc3-<5M=ji`_hCzrZbI z{&Ux6^Hm zGu}yE7g=?Z-M09hZjAeSCKoBz!`^J~pkR)4F_JZ;sa ziRUBwSZjH+tEu( zc=O(h*Uos>#(Vg)IgvFs>2UqDhciyOb2HE$^fG!Al~p)bg?iBMP&|VS<~o|H%e&BNRngL_=z^xmt!*xMnWkQMRr5HP z#^Y-2Zggc;bVXIPhSqgxG5zy)K8E$SnEmy(sLPAT9Ad!y2sT=n?O@&Vef(&tWQNJ~ z#svwclIaC=V@dM9?fu*P4&U#8!WZ?Au<+Ng@W-(5hp-@f^axZrH>I^birm!Q&eiU| zgx57nH5D`6342~XHMQsUvrgQTDW8c>x;s-|j~1_WZuTNKdBLFb*x&11i zf^S#*9<_7dzH;n)IQ(+;U&G$Fzx!;)XX|tpuGH~4KHV$bwpv{B)x45b5PliQjV0V> rj9~tC#I4W$$x-17M;;!JWg{5Bh4H&fm|LkX@r%#hO6jvdSSSAlUqoMr literal 0 HcmV?d00001 diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java new file mode 100644 index 000000000..1becc234c --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java @@ -0,0 +1,196 @@ + +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hwpf.usermodel; + +import java.io.ByteArrayOutputStream; +import java.io.FileInputStream; +import java.util.List; + +import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.model.PicturesTable; +import org.apache.poi.hwpf.usermodel.Picture; + +import junit.framework.TestCase; + +/** + * Test to see if Range.delete() works even if the Range contains a + * CharacterRun that uses Unicode characters. + */ +public class TestRangeDelete extends TestCase { + + // u201c and u201d are "smart-quotes" + private String originalText = + "It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present. Everybody should be thankful to the ${organization} ${delete} and all the POI contributors for their assistance in this matter.\r"; + private String searchText = "${delete}"; + private String expectedText1 = " This is an MS-Word 97 formatted document created using NeoOffice v. 2.2.4 Patch 0 (OpenOffice.org v. 2.2.1).\r"; + private String expectedText2 = + "It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present. Everybody should be thankful to the ${organization} and all the POI contributors for their assistance in this matter.\r"; + private String expectedText3 = "Thank you, ${organization} !\r"; + + private String illustrativeDocFile; + + protected void setUp() throws Exception { + + String dirname = System.getProperty("HWPF.testdata.path"); + + illustrativeDocFile = dirname + "/testRangeDelete.doc"; + } + + /** + * Test just opening the files + */ + public void testOpen() throws Exception { + + HWPFDocument docA = new HWPFDocument(new FileInputStream(illustrativeDocFile)); + } + + /** + * Test (more "confirm" than test) that we have the general structure that we expect to have. + */ + public void testDocStructure() throws Exception { + + HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile)); + + Range range = daDoc.getRange(); + + assertEquals(1, range.numSections()); + Section section = range.getSection(0); + + assertEquals(5, section.numParagraphs()); + Paragraph para = section.getParagraph(2); + + assertEquals(5, para.numCharacterRuns()); + + assertEquals(originalText, para.text()); + } + + /** + * Test that we can delete text (one instance) from our Range with Unicode text. + */ + public void testRangeDeleteOne() throws Exception { + + HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile)); + + Range range = daDoc.getRange(); + assertEquals(1, range.numSections()); + + Section section = range.getSection(0); + assertEquals(5, section.numParagraphs()); + + Paragraph para = section.getParagraph(2); + + String text = para.text(); + assertEquals(originalText, text); + + int offset = text.indexOf(searchText); + assertEquals(192, offset); + + int absOffset = para.getStartOffset() + offset; + if (para.usesUnicode()) + absOffset = para.getStartOffset() + (offset * 2); + + Range subRange = new Range(absOffset, (absOffset + searchText.length()), para.getDocument()); + if (subRange.usesUnicode()) + subRange = new Range(absOffset, (absOffset + (searchText.length() * 2)), para.getDocument()); + + assertEquals(searchText, subRange.text()); + + subRange.delete(); + + // we need to let the model re-calculate the Range before we evaluate it + range = daDoc.getRange(); + + assertEquals(1, range.numSections()); + section = range.getSection(0); + + assertEquals(5, section.numParagraphs()); + para = section.getParagraph(2); + + text = para.text(); + assertEquals(expectedText2, text); + + // this can lead to a StringBufferOutOfBoundsException, so we will add it + // even though we don't have an assertion for it + Range daRange = daDoc.getRange(); + daRange.text(); + } + + /** + * Test that we can delete text (all instances of) from our Range with Unicode text. + */ + public void testRangeDeleteAll() throws Exception { + + HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile)); + + Range range = daDoc.getRange(); + assertEquals(1, range.numSections()); + + Section section = range.getSection(0); + assertEquals(5, section.numParagraphs()); + + Paragraph para = section.getParagraph(2); + + String text = para.text(); + assertEquals(originalText, text); + + boolean keepLooking = true; + while (keepLooking) { + + int offset = range.text().indexOf(searchText); + if (offset >= 0) { + + int absOffset = range.getStartOffset() + offset; + if (range.usesUnicode()) + absOffset = range.getStartOffset() + (offset * 2); + + Range subRange = new Range( + absOffset, (absOffset + searchText.length()), range.getDocument()); + if (subRange.usesUnicode()) + subRange = new Range( + absOffset, (absOffset + (searchText.length() * 2)), range.getDocument()); + + assertEquals(searchText, subRange.text()); + + subRange.delete(); + + } else + keepLooking = false; + } + + // we need to let the model re-calculate the Range before we use it + range = daDoc.getRange(); + + assertEquals(1, range.numSections()); + section = range.getSection(0); + + assertEquals(5, section.numParagraphs()); + + para = section.getParagraph(1); + text = para.text(); + assertEquals(expectedText1, text); + + para = section.getParagraph(2); + text = para.text(); + assertEquals(expectedText2, text); + + para = section.getParagraph(3); + text = para.text(); + assertEquals(expectedText3, text); + } +}