Patch from N. Hira from bug #45001 - Further fix for HWPF Range.delete() and unicode characters

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@672569 13f79535-47bb-0310-9956-ffa450edef68
2008-06-28 18:54:02 +00:00 · 2008-06-28 18:54:02 +00:00 · 5acc99f2a1
commit 5acc99f2a1
parent 1f27ceb98f
5 changed files with 210 additions and 4 deletions
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@ -37,6 +37,7 @@
 		<!-- Don't forget to update status.xml too! -->
        <release version="3.1.1-alpha1" date="2008-??-??">
           <action dev="POI-DEVELOPERS" type="fix">45001 - Further fix for HWPF Range.delete() and unicode characters</action>
           <action dev="POI-DEVELOPERS" type="add">45175 - Support for variable length operands in org.apache.poi.hwpf.sprm.SprmOperation</action>
           <action dev="POI-DEVELOPERS" type="fix">Avoid spurious missing lines with the MissingRecordAware event code, and odd files that contain RowRecords in the middle of the cell Records.</action>
           <action dev="POI-DEVELOPERS" type="add">Support for parsing formulas during EventUserModel processing, via the new EventWorkbookBuilder</action>
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@ -34,6 +34,7 @@
 	<!-- Don't forget to update changes.xml too! -->
    <changes>
        <release version="3.1.1-alpha1" date="2008-??-??">
           <action dev="POI-DEVELOPERS" type="fix">45001 - Further fix for HWPF Range.delete() and unicode characters</action>
           <action dev="POI-DEVELOPERS" type="add">45175 - Support for variable length operands in org.apache.poi.hwpf.sprm.SprmOperation</action>
           <action dev="POI-DEVELOPERS" type="fix">Avoid spurious missing lines with the MissingRecordAware event code, and odd files that contain RowRecords in the middle of the cell Records.</action>
           <action dev="POI-DEVELOPERS" type="add">Support for parsing formulas during EventUserModel processing, via the new EventWorkbookBuilder</action>
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java
@ -91,15 +91,18 @@ public class TextPiece extends PropertyNode implements Comparable
   public void adjustForDelete(int start, int length)
   {
 	   // length is expected to be the number of code-points,
 	   // not the number of characters
 	   int numChars = length;
 	   if (usesUnicode()) {
 		   start /= 2;
-		   length /= 2;
+		   numChars = (length / 2);
 	   }
 	   int myStart = getStart();
 	   int myEnd = getEnd();
-	   int end = start + length;
+	   int end = start + numChars;
 	   /* do we have to delete from this text piece? */
 	   if (start <= myEnd && end >= myStart) {
@ -108,9 +111,14 @@ public class TextPiece extends PropertyNode implements Comparable
 		   int overlapStart = Math.max(myStart, start);
 		   int overlapEnd = Math.min(myEnd, end);
 		   ((StringBuffer)_buf).delete(overlapStart, overlapEnd);
 		   super.adjustForDelete(start, length);
 	   }
 	   // We need to invoke this even if text from this piece is not being
 	   // deleted because the adjustment must propagate to all subsequent
 	   // text pieces i.e., if text from tp[n] is being deleted, then
 	   // tp[n + 1], tp[n + 2], etc. will need to be adjusted.
 	   // The superclass is expected to use a separate sentry for this.
 	   super.adjustForDelete(start, length);
   }
   public int characterLength()
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/data/testRangeDelete.doc
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/data/testRangeDelete.doc
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java
@ -0,0 +1,196 @@
 /* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
 	   http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
 ==================================================================== */
 package org.apache.poi.hwpf.usermodel;
 import java.io.ByteArrayOutputStream;
 import java.io.FileInputStream;
 import java.util.List;
 import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.hwpf.model.PicturesTable;
 import org.apache.poi.hwpf.usermodel.Picture;
 import junit.framework.TestCase;
 /**
 *	Test to see if Range.delete() works even if the Range contains a
 *	CharacterRun that uses Unicode characters.
 */
 public class TestRangeDelete extends TestCase {
 	// u201c and u201d are "smart-quotes"
 	private String originalText =
 		"It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present.  Everybody should be thankful to the ${organization} ${delete} and all the POI contributors for their assistance in this matter.\r";
 	private String searchText = "${delete}";
 	private String expectedText1 = " This is an MS-Word 97 formatted document created using NeoOffice v. 2.2.4 Patch 0 (OpenOffice.org v. 2.2.1).\r";
 	private String expectedText2 =
 		"It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present.  Everybody should be thankful to the ${organization}  and all the POI contributors for their assistance in this matter.\r";
 	private String expectedText3 = "Thank you, ${organization} !\r";
 	private String illustrativeDocFile;
 	protected void setUp() throws Exception {
 		String dirname = System.getProperty("HWPF.testdata.path");
 		illustrativeDocFile = dirname + "/testRangeDelete.doc";
 	}
 	/**
 	 * Test just opening the files
 	 */
 	public void testOpen() throws Exception {
 		HWPFDocument docA = new HWPFDocument(new FileInputStream(illustrativeDocFile));
 	}
 	/**
 	 * Test (more "confirm" than test) that we have the general structure that we expect to have.
 	 */
 	public void testDocStructure() throws Exception {
 		HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile));
 		Range range = daDoc.getRange();
 		assertEquals(1, range.numSections());
 		Section section = range.getSection(0);
 		assertEquals(5, section.numParagraphs());
 		Paragraph para = section.getParagraph(2);
 		assertEquals(5, para.numCharacterRuns());
 		assertEquals(originalText, para.text());
 	}
 	/**
 	 * Test that we can delete text (one instance) from our Range with Unicode text.
 	 */
 	public void testRangeDeleteOne() throws Exception {
 		HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile));
 		Range range = daDoc.getRange();
 		assertEquals(1, range.numSections());
 		Section section = range.getSection(0);
 		assertEquals(5, section.numParagraphs());
 		Paragraph para = section.getParagraph(2);
 		String text = para.text();
 		assertEquals(originalText, text);
 		int offset = text.indexOf(searchText);
 		assertEquals(192, offset);
 		int absOffset = para.getStartOffset() + offset;
 		if (para.usesUnicode())
 			absOffset = para.getStartOffset() + (offset * 2);
 		Range subRange = new Range(absOffset, (absOffset + searchText.length()), para.getDocument());
 		if (subRange.usesUnicode())
 			subRange = new Range(absOffset, (absOffset + (searchText.length() * 2)), para.getDocument());
 		assertEquals(searchText, subRange.text());
 		subRange.delete();
 		// we need to let the model re-calculate the Range before we evaluate it
 		range = daDoc.getRange();
 		assertEquals(1, range.numSections());
 		section = range.getSection(0);
 		assertEquals(5, section.numParagraphs());
 		para = section.getParagraph(2);
 		text = para.text();
 		assertEquals(expectedText2, text);
 		// this can lead to a StringBufferOutOfBoundsException, so we will add it
 		// even though we don't have an assertion for it
 		Range daRange = daDoc.getRange();
 		daRange.text();
 	}
 	/**
 	 * Test that we can delete text (all instances of) from our Range with Unicode text.
 	 */
 	public void testRangeDeleteAll() throws Exception {
 		HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile));
 		Range range = daDoc.getRange();
 		assertEquals(1, range.numSections());
 		Section section = range.getSection(0);
 		assertEquals(5, section.numParagraphs());
 		Paragraph para = section.getParagraph(2);
 		String text = para.text();
 		assertEquals(originalText, text);
 		boolean keepLooking = true;
 		while (keepLooking) {
 			int offset = range.text().indexOf(searchText);
 			if (offset >= 0) {
 				int absOffset = range.getStartOffset() + offset;
 				if (range.usesUnicode())
 					absOffset = range.getStartOffset() + (offset * 2);
 				Range subRange = new Range(
 					absOffset, (absOffset + searchText.length()), range.getDocument());
 				if (subRange.usesUnicode())
 					subRange = new Range(
 						absOffset, (absOffset + (searchText.length() * 2)), range.getDocument());
 				assertEquals(searchText, subRange.text());
 				subRange.delete();
 			} else
 				keepLooking = false;
 		}
 		// we need to let the model re-calculate the Range before we use it
 		range = daDoc.getRange();
 		assertEquals(1, range.numSections());
 		section = range.getSection(0);
 		assertEquals(5, section.numParagraphs());
 		para = section.getParagraph(1);
 		text = para.text();
 		assertEquals(expectedText1, text);
 		para = section.getParagraph(2);
 		text = para.text();
 		assertEquals(expectedText2, text);
 		para = section.getParagraph(3);
 		text = para.text();
 		assertEquals(expectedText3, text);
 	}
 }