Patch from N. Hira from bug #45001 - Further fix for HWPF Range.delete() and unicode characters

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@672569 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-06-28 18:54:02 +00:00
parent 1f27ceb98f
commit 5acc99f2a1
5 changed files with 210 additions and 4 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! --> <!-- Don't forget to update status.xml too! -->
<release version="3.1.1-alpha1" date="2008-??-??"> <release version="3.1.1-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="fix">45001 - Further fix for HWPF Range.delete() and unicode characters</action>
<action dev="POI-DEVELOPERS" type="add">45175 - Support for variable length operands in org.apache.poi.hwpf.sprm.SprmOperation</action> <action dev="POI-DEVELOPERS" type="add">45175 - Support for variable length operands in org.apache.poi.hwpf.sprm.SprmOperation</action>
<action dev="POI-DEVELOPERS" type="fix">Avoid spurious missing lines with the MissingRecordAware event code, and odd files that contain RowRecords in the middle of the cell Records.</action> <action dev="POI-DEVELOPERS" type="fix">Avoid spurious missing lines with the MissingRecordAware event code, and odd files that contain RowRecords in the middle of the cell Records.</action>
<action dev="POI-DEVELOPERS" type="add">Support for parsing formulas during EventUserModel processing, via the new EventWorkbookBuilder</action> <action dev="POI-DEVELOPERS" type="add">Support for parsing formulas during EventUserModel processing, via the new EventWorkbookBuilder</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! --> <!-- Don't forget to update changes.xml too! -->
<changes> <changes>
<release version="3.1.1-alpha1" date="2008-??-??"> <release version="3.1.1-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="fix">45001 - Further fix for HWPF Range.delete() and unicode characters</action>
<action dev="POI-DEVELOPERS" type="add">45175 - Support for variable length operands in org.apache.poi.hwpf.sprm.SprmOperation</action> <action dev="POI-DEVELOPERS" type="add">45175 - Support for variable length operands in org.apache.poi.hwpf.sprm.SprmOperation</action>
<action dev="POI-DEVELOPERS" type="fix">Avoid spurious missing lines with the MissingRecordAware event code, and odd files that contain RowRecords in the middle of the cell Records.</action> <action dev="POI-DEVELOPERS" type="fix">Avoid spurious missing lines with the MissingRecordAware event code, and odd files that contain RowRecords in the middle of the cell Records.</action>
<action dev="POI-DEVELOPERS" type="add">Support for parsing formulas during EventUserModel processing, via the new EventWorkbookBuilder</action> <action dev="POI-DEVELOPERS" type="add">Support for parsing formulas during EventUserModel processing, via the new EventWorkbookBuilder</action>

View File

@ -91,15 +91,18 @@ public class TextPiece extends PropertyNode implements Comparable
public void adjustForDelete(int start, int length) public void adjustForDelete(int start, int length)
{ {
// length is expected to be the number of code-points,
// not the number of characters
int numChars = length;
if (usesUnicode()) { if (usesUnicode()) {
start /= 2; start /= 2;
length /= 2; numChars = (length / 2);
} }
int myStart = getStart(); int myStart = getStart();
int myEnd = getEnd(); int myEnd = getEnd();
int end = start + length; int end = start + numChars;
/* do we have to delete from this text piece? */ /* do we have to delete from this text piece? */
if (start <= myEnd && end >= myStart) { if (start <= myEnd && end >= myStart) {
@ -108,9 +111,14 @@ public class TextPiece extends PropertyNode implements Comparable
int overlapStart = Math.max(myStart, start); int overlapStart = Math.max(myStart, start);
int overlapEnd = Math.min(myEnd, end); int overlapEnd = Math.min(myEnd, end);
((StringBuffer)_buf).delete(overlapStart, overlapEnd); ((StringBuffer)_buf).delete(overlapStart, overlapEnd);
super.adjustForDelete(start, length);
} }
// We need to invoke this even if text from this piece is not being
// deleted because the adjustment must propagate to all subsequent
// text pieces i.e., if text from tp[n] is being deleted, then
// tp[n + 1], tp[n + 2], etc. will need to be adjusted.
// The superclass is expected to use a separate sentry for this.
super.adjustForDelete(start, length);
} }
public int characterLength() public int characterLength()

View File

@ -0,0 +1,196 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hwpf.usermodel;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.util.List;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.Picture;
import junit.framework.TestCase;
/**
* Test to see if Range.delete() works even if the Range contains a
* CharacterRun that uses Unicode characters.
*/
public class TestRangeDelete extends TestCase {
// u201c and u201d are "smart-quotes"
private String originalText =
"It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present. Everybody should be thankful to the ${organization} ${delete} and all the POI contributors for their assistance in this matter.\r";
private String searchText = "${delete}";
private String expectedText1 = " This is an MS-Word 97 formatted document created using NeoOffice v. 2.2.4 Patch 0 (OpenOffice.org v. 2.2.1).\r";
private String expectedText2 =
"It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present. Everybody should be thankful to the ${organization} and all the POI contributors for their assistance in this matter.\r";
private String expectedText3 = "Thank you, ${organization} !\r";
private String illustrativeDocFile;
protected void setUp() throws Exception {
String dirname = System.getProperty("HWPF.testdata.path");
illustrativeDocFile = dirname + "/testRangeDelete.doc";
}
/**
* Test just opening the files
*/
public void testOpen() throws Exception {
HWPFDocument docA = new HWPFDocument(new FileInputStream(illustrativeDocFile));
}
/**
* Test (more "confirm" than test) that we have the general structure that we expect to have.
*/
public void testDocStructure() throws Exception {
HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile));
Range range = daDoc.getRange();
assertEquals(1, range.numSections());
Section section = range.getSection(0);
assertEquals(5, section.numParagraphs());
Paragraph para = section.getParagraph(2);
assertEquals(5, para.numCharacterRuns());
assertEquals(originalText, para.text());
}
/**
* Test that we can delete text (one instance) from our Range with Unicode text.
*/
public void testRangeDeleteOne() throws Exception {
HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile));
Range range = daDoc.getRange();
assertEquals(1, range.numSections());
Section section = range.getSection(0);
assertEquals(5, section.numParagraphs());
Paragraph para = section.getParagraph(2);
String text = para.text();
assertEquals(originalText, text);
int offset = text.indexOf(searchText);
assertEquals(192, offset);
int absOffset = para.getStartOffset() + offset;
if (para.usesUnicode())
absOffset = para.getStartOffset() + (offset * 2);
Range subRange = new Range(absOffset, (absOffset + searchText.length()), para.getDocument());
if (subRange.usesUnicode())
subRange = new Range(absOffset, (absOffset + (searchText.length() * 2)), para.getDocument());
assertEquals(searchText, subRange.text());
subRange.delete();
// we need to let the model re-calculate the Range before we evaluate it
range = daDoc.getRange();
assertEquals(1, range.numSections());
section = range.getSection(0);
assertEquals(5, section.numParagraphs());
para = section.getParagraph(2);
text = para.text();
assertEquals(expectedText2, text);
// this can lead to a StringBufferOutOfBoundsException, so we will add it
// even though we don't have an assertion for it
Range daRange = daDoc.getRange();
daRange.text();
}
/**
* Test that we can delete text (all instances of) from our Range with Unicode text.
*/
public void testRangeDeleteAll() throws Exception {
HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile));
Range range = daDoc.getRange();
assertEquals(1, range.numSections());
Section section = range.getSection(0);
assertEquals(5, section.numParagraphs());
Paragraph para = section.getParagraph(2);
String text = para.text();
assertEquals(originalText, text);
boolean keepLooking = true;
while (keepLooking) {
int offset = range.text().indexOf(searchText);
if (offset >= 0) {
int absOffset = range.getStartOffset() + offset;
if (range.usesUnicode())
absOffset = range.getStartOffset() + (offset * 2);
Range subRange = new Range(
absOffset, (absOffset + searchText.length()), range.getDocument());
if (subRange.usesUnicode())
subRange = new Range(
absOffset, (absOffset + (searchText.length() * 2)), range.getDocument());
assertEquals(searchText, subRange.text());
subRange.delete();
} else
keepLooking = false;
}
// we need to let the model re-calculate the Range before we use it
range = daDoc.getRange();
assertEquals(1, range.numSections());
section = range.getSection(0);
assertEquals(5, section.numParagraphs());
para = section.getParagraph(1);
text = para.text();
assertEquals(expectedText1, text);
para = section.getParagraph(2);
text = para.text();
assertEquals(expectedText2, text);
para = section.getParagraph(3);
text = para.text();
assertEquals(expectedText3, text);
}
}