fix for bug 45866 - allowed for change of unicode compression across Continue records

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@703620 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Josh Micich 2008-10-11 01:11:05 +00:00
parent b2988b3666
commit 7acf9a90b5
6 changed files with 183 additions and 68 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! -->
<release version="3.2-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="fix">45866 - allowed for change of unicode compression across Continue records</action>
<action dev="POI-DEVELOPERS" type="fix">45964 - support for link formulas in Text Objects</action>
<action dev="POI-DEVELOPERS" type="fix">43354 - support for evalating formulas with missing args</action>
<action dev="POI-DEVELOPERS" type="fix">45912 - fixed ArrayIndexOutOfBoundsException in EmbeddedObjectRefSubRecord</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.2-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="fix">45866 - allowed for change of unicode compression across Continue records</action>
<action dev="POI-DEVELOPERS" type="fix">45964 - support for link formulas in Text Objects</action>
<action dev="POI-DEVELOPERS" type="fix">43354 - support for evalating formulas with missing args</action>
<action dev="POI-DEVELOPERS" type="fix">45912 - fixed ArrayIndexOutOfBoundsException in EmbeddedObjectRefSubRecord</action>

View File

@ -218,54 +218,81 @@ public final class RecordInputStream extends InputStream {
return result;
}
/**
* given a byte array of 16-bit unicode characters, compress to 8-bit and
* return a string
*
* { 0x16, 0x00 } -0x16
*
* @param length the length of the final string
* @return the converted string
* @exception IllegalArgumentException if len is too large (i.e.,
* there is not enough data in string to create a String of that
* length)
*/
public String readUnicodeLEString(int length) {
if ((length < 0) || (((remaining() / 2) < length) && !isContinueNext())) {
throw new IllegalArgumentException("Illegal length - asked for " + length + " but only " + (remaining()/2) + " left!");
}
public String readString() {
int requestedLength = readUShort();
byte compressFlag = readByte();
return readStringCommon(requestedLength, compressFlag == 0);
}
/**
* given a byte array of 16-bit unicode characters, compress to 8-bit and
* return a string
*
* { 0x16, 0x00 } -0x16
*
* @param requestedLength the length of the final string
* @return the converted string
* @exception IllegalArgumentException if len is too large (i.e.,
* there is not enough data in string to create a String of that
* length)
*/
public String readUnicodeLEString(int requestedLength) {
return readStringCommon(requestedLength, false);
}
StringBuffer buf = new StringBuffer(length);
for (int i=0;i<length;i++) {
if ((remaining() == 0) && (isContinueNext())){
nextRecord();
int compressByte = readByte();
if(compressByte != 1) throw new IllegalArgumentException("compressByte in continue records must be 1 while reading unicode LE string");
}
char ch = (char)readShort();
buf.append(ch);
}
return buf.toString();
}
public String readCompressedUnicode(int requestedLength) {
return readStringCommon(requestedLength, true);
}
public String readCompressedUnicode(int length) {
if ((length < 0) || ((remaining() < length) && !isContinueNext())) {
throw new IllegalArgumentException("Illegal length " + length);
}
StringBuffer buf = new StringBuffer(length);
for (int i=0;i<length;i++) {
if ((remaining() == 0) && (isContinueNext())) {
nextRecord();
int compressByte = readByte();
if(compressByte != 0) throw new IllegalArgumentException("compressByte in continue records must be 0 while reading compressed unicode");
}
byte b = readByte();
char ch = (char)(0x00FF & b); // avoid sex
buf.append(ch);
}
return buf.toString();
}
private String readStringCommon(int requestedLength, boolean pIsCompressedEncoding) {
// Sanity check to detect garbage string lengths
if (requestedLength < 0 || requestedLength > 0x100000) { // 16 million chars?
throw new IllegalArgumentException("Bad requested string length (" + requestedLength + ")");
}
char[] buf = new char[requestedLength];
boolean isCompressedEncoding = pIsCompressedEncoding;
int curLen = 0;
while(true) {
int availableChars =isCompressedEncoding ? remaining() : remaining() / LittleEndian.SHORT_SIZE;
if (requestedLength - curLen <= availableChars) {
// enough space in current record, so just read it out
while(curLen < requestedLength) {
char ch;
if (isCompressedEncoding) {
ch = (char)readUByte();
} else {
ch = (char)readShort();
}
buf[curLen] = ch;
curLen++;
}
return new String(buf);
}
// else string has been spilled into next continue record
// so read what's left of the current record
while(availableChars > 0) {
char ch;
if (isCompressedEncoding) {
ch = (char)readUByte();
} else {
ch = (char)readShort();
}
buf[curLen] = ch;
curLen++;
availableChars--;
}
if (!isContinueNext()) {
throw new RecordFormatException("Expected to find a ContinueRecord in order to read remaining "
+ (requestedLength-curLen) + " of " + requestedLength + " chars");
}
if(remaining() != 0) {
throw new RecordFormatException("Odd number of bytes(" + remaining() + ") left behind");
}
nextRecord();
// note - the compressed flag may change on the fly
byte compressFlag = readByte();
isCompressedEncoding = (compressFlag == 0);
}
}
/** Returns an excel style unicode string from the bytes reminaing in the record.
* <i>Note:</i> Unicode strings differ from <b>normal</b> strings due to the addition of

View File

@ -84,6 +84,7 @@ public final class AllRecordTests {
result.addTestSuite(TestPaneRecord.class);
result.addTestSuite(TestPlotAreaRecord.class);
result.addTestSuite(TestPlotGrowthRecord.class);
result.addTestSuite(TestRecordInputStream.class);
result.addTestSuite(TestRecordFactory.class);
result.addTestSuite(TestSCLRecord.class);
result.addTestSuite(TestSSTDeserializer.class);

View File

@ -0,0 +1,97 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record;
import org.apache.poi.util.HexRead;
import junit.framework.AssertionFailedError;
import junit.framework.TestCase;
/**
* Tests for {@link RecordInputStream}
*
* @author Josh Micich
*/
public final class TestRecordInputStream extends TestCase {
/**
* Data inspired by attachment 22626 of bug 45866<br/>
* A unicode string of 18 chars, with a continue record where the compression flag changes
*/
private static final String HED_DUMP1 = ""
+ "1A 59 00 8A 9E 8A " // 3 uncompressed unicode chars
+ "3C 00 " // Continue sid
+ "10 00 " // rec size 16 (1+15)
+ "00" // next chunk is compressed
+ "20 2D 20 4D 75 6C 74 69 6C 69 6E 67 75 61 6C " // 15 chars
;
/**
* same string re-arranged
*/
private static final String HED_DUMP2 = ""
// 15 chars at end of current record
+ "4D 75 6C 74 69 6C 69 6E 67 75 61 6C 20 2D 20"
+ "3C 00 " // Continue sid
+ "07 00 " // rec size 7 (1+6)
+ "01" // this bit uncompressed
+ "1A 59 00 8A 9E 8A " // 3 uncompressed unicode chars
;
public void testChangeOfCompressionFlag_bug25866() {
byte[] changingFlagSimpleData = HexRead.readFromString(""
+ "AA AA " // fake SID
+ "06 00 " // first rec len 6
+ HED_DUMP1
);
RecordInputStream in = TestcaseRecordInputStream.create(changingFlagSimpleData);
String actual;
try {
actual = in.readUnicodeLEString(18);
} catch (IllegalArgumentException e) {
if ("compressByte in continue records must be 1 while reading unicode LE string".equals(e.getMessage())) {
throw new AssertionFailedError("Identified bug 45866");
}
throw e;
}
assertEquals("\u591A\u8A00\u8A9E - Multilingual", actual);
}
public void testChangeFromUnCompressedToCompressed() {
byte[] changingFlagSimpleData = HexRead.readFromString(""
+ "AA AA " // fake SID
+ "0F 00 " // first rec len 15
+ HED_DUMP2
);
RecordInputStream in = TestcaseRecordInputStream.create(changingFlagSimpleData);
String actual = in.readCompressedUnicode(18);
assertEquals("Multilingual - \u591A\u8A00\u8A9E", actual);
}
public void testReadString() {
byte[] changingFlagFullData = HexRead.readFromString(""
+ "AA AA " // fake SID
+ "12 00 " // first rec len 18 (15 + next 3 bytes)
+ "12 00 " // total chars 18
+ "00 " // this bit compressed
+ HED_DUMP2
);
RecordInputStream in = TestcaseRecordInputStream.create(changingFlagFullData);
String actual = in.readString();
assertEquals("Multilingual - \u591A\u8A00\u8A9E", actual);
}
}

View File

@ -1,4 +1,3 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
@ -19,6 +18,8 @@
package org.apache.poi.hssf.record;
import org.apache.poi.util.HexRead;
import junit.framework.TestCase;
/**
@ -26,18 +27,10 @@ import junit.framework.TestCase;
*
* @author Jason Height (jheight at apache.org)
*/
public class TestUnicodeString
extends TestCase
{
public final class TestUnicodeString extends TestCase {
public TestUnicodeString( String s )
{
super( s );
}
public void testSmallStringSize()
throws Exception
{
public void testSmallStringSize() {
//Test a basic string
UnicodeString s = makeUnicodeString("Test");
UnicodeString.UnicodeRecordStats stats = new UnicodeString.UnicodeRecordStats();
@ -80,9 +73,7 @@ public class TestUnicodeString
assertEquals(30, stats.recordSize);
}
public void testPerfectStringSize()
throws Exception
{
public void testPerfectStringSize() {
//Test a basic string
UnicodeString s = makeUnicodeString(SSTRecord.MAX_RECORD_SIZE-2-1);
UnicodeString.UnicodeRecordStats stats = new UnicodeString.UnicodeRecordStats();
@ -99,9 +90,7 @@ public class TestUnicodeString
assertEquals(SSTRecord.MAX_RECORD_SIZE-1, stats.recordSize);
}
public void testPerfectRichStringSize()
throws Exception
{
public void testPerfectRichStringSize() {
//Test a rich text string
UnicodeString s = makeUnicodeString(SSTRecord.MAX_RECORD_SIZE-2-1-8-2);
s.addFormatRun(new UnicodeString.FormatRun((short)1,(short)0));
@ -123,7 +112,7 @@ public class TestUnicodeString
assertEquals(SSTRecord.MAX_RECORD_SIZE-1, stats.recordSize);
}
public void testContinuedStringSize() throws Exception {
public void testContinuedStringSize() {
//Test a basic string
UnicodeString s = makeUnicodeString(SSTRecord.MAX_RECORD_SIZE-2-1+20);
UnicodeString.UnicodeRecordStats stats = new UnicodeString.UnicodeRecordStats();
@ -132,7 +121,7 @@ public class TestUnicodeString
}
/** Tests that a string size calculation that fits neatly in two records, the second being a continue*/
public void testPerfectContinuedStringSize() throws Exception {
public void testPerfectContinuedStringSize() {
//Test a basic string
int strSize = SSTRecord.MAX_RECORD_SIZE*2;
//String overhead
@ -150,19 +139,18 @@ public class TestUnicodeString
private UnicodeString makeUnicodeString( String s )
private static UnicodeString makeUnicodeString( String s )
{
UnicodeString st = new UnicodeString(s);
st.setOptionFlags((byte)0);
return st;
}
private UnicodeString makeUnicodeString( int numChars) {
private static UnicodeString makeUnicodeString( int numChars) {
StringBuffer b = new StringBuffer(numChars);
for (int i=0;i<numChars;i++) {
b.append(i%10);
}
return makeUnicodeString(b.toString());
}
}