From 7acf9a90b5eeb590793319832da7033fc6ff2f40 Mon Sep 17 00:00:00 2001 From: Josh Micich Date: Sat, 11 Oct 2008 01:11:05 +0000 Subject: [PATCH] fix for bug 45866 - allowed for change of unicode compression across Continue records git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@703620 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/changes.xml | 1 + src/documentation/content/xdocs/status.xml | 1 + .../poi/hssf/record/RecordInputStream.java | 119 +++++++++++------- .../poi/hssf/record/AllRecordTests.java | 1 + .../hssf/record/TestRecordInputStream.java | 97 ++++++++++++++ .../poi/hssf/record/TestUnicodeString.java | 32 ++--- 6 files changed, 183 insertions(+), 68 deletions(-) create mode 100644 src/testcases/org/apache/poi/hssf/record/TestRecordInputStream.java diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 2ba7f0c97..28443d427 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 45866 - allowed for change of unicode compression across Continue records 45964 - support for link formulas in Text Objects 43354 - support for evalating formulas with missing args 45912 - fixed ArrayIndexOutOfBoundsException in EmbeddedObjectRefSubRecord diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 8f661cdb4..80d6641c8 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 45866 - allowed for change of unicode compression across Continue records 45964 - support for link formulas in Text Objects 43354 - support for evalating formulas with missing args 45912 - fixed ArrayIndexOutOfBoundsException in EmbeddedObjectRefSubRecord diff --git a/src/java/org/apache/poi/hssf/record/RecordInputStream.java b/src/java/org/apache/poi/hssf/record/RecordInputStream.java index 02c4f7a05..696f4fef7 100755 --- a/src/java/org/apache/poi/hssf/record/RecordInputStream.java +++ b/src/java/org/apache/poi/hssf/record/RecordInputStream.java @@ -218,54 +218,81 @@ public final class RecordInputStream extends InputStream { return result; } - /** - * given a byte array of 16-bit unicode characters, compress to 8-bit and - * return a string - * - * { 0x16, 0x00 } -0x16 - * - * @param length the length of the final string - * @return the converted string - * @exception IllegalArgumentException if len is too large (i.e., - * there is not enough data in string to create a String of that - * length) - */ - public String readUnicodeLEString(int length) { - if ((length < 0) || (((remaining() / 2) < length) && !isContinueNext())) { - throw new IllegalArgumentException("Illegal length - asked for " + length + " but only " + (remaining()/2) + " left!"); - } + public String readString() { + int requestedLength = readUShort(); + byte compressFlag = readByte(); + return readStringCommon(requestedLength, compressFlag == 0); + } + /** + * given a byte array of 16-bit unicode characters, compress to 8-bit and + * return a string + * + * { 0x16, 0x00 } -0x16 + * + * @param requestedLength the length of the final string + * @return the converted string + * @exception IllegalArgumentException if len is too large (i.e., + * there is not enough data in string to create a String of that + * length) + */ + public String readUnicodeLEString(int requestedLength) { + return readStringCommon(requestedLength, false); + } - StringBuffer buf = new StringBuffer(length); - for (int i=0;i 0x100000) { // 16 million chars? + throw new IllegalArgumentException("Bad requested string length (" + requestedLength + ")"); + } + char[] buf = new char[requestedLength]; + boolean isCompressedEncoding = pIsCompressedEncoding; + int curLen = 0; + while(true) { + int availableChars =isCompressedEncoding ? remaining() : remaining() / LittleEndian.SHORT_SIZE; + if (requestedLength - curLen <= availableChars) { + // enough space in current record, so just read it out + while(curLen < requestedLength) { + char ch; + if (isCompressedEncoding) { + ch = (char)readUByte(); + } else { + ch = (char)readShort(); + } + buf[curLen] = ch; + curLen++; + } + return new String(buf); + } + // else string has been spilled into next continue record + // so read what's left of the current record + while(availableChars > 0) { + char ch; + if (isCompressedEncoding) { + ch = (char)readUByte(); + } else { + ch = (char)readShort(); + } + buf[curLen] = ch; + curLen++; + availableChars--; + } + if (!isContinueNext()) { + throw new RecordFormatException("Expected to find a ContinueRecord in order to read remaining " + + (requestedLength-curLen) + " of " + requestedLength + " chars"); + } + if(remaining() != 0) { + throw new RecordFormatException("Odd number of bytes(" + remaining() + ") left behind"); + } + nextRecord(); + // note - the compressed flag may change on the fly + byte compressFlag = readByte(); + isCompressedEncoding = (compressFlag == 0); + } + } /** Returns an excel style unicode string from the bytes reminaing in the record. * Note: Unicode strings differ from normal strings due to the addition of diff --git a/src/testcases/org/apache/poi/hssf/record/AllRecordTests.java b/src/testcases/org/apache/poi/hssf/record/AllRecordTests.java index fb3270ae7..5fb6f4aa0 100755 --- a/src/testcases/org/apache/poi/hssf/record/AllRecordTests.java +++ b/src/testcases/org/apache/poi/hssf/record/AllRecordTests.java @@ -84,6 +84,7 @@ public final class AllRecordTests { result.addTestSuite(TestPaneRecord.class); result.addTestSuite(TestPlotAreaRecord.class); result.addTestSuite(TestPlotGrowthRecord.class); + result.addTestSuite(TestRecordInputStream.class); result.addTestSuite(TestRecordFactory.class); result.addTestSuite(TestSCLRecord.class); result.addTestSuite(TestSSTDeserializer.class); diff --git a/src/testcases/org/apache/poi/hssf/record/TestRecordInputStream.java b/src/testcases/org/apache/poi/hssf/record/TestRecordInputStream.java new file mode 100644 index 000000000..2b236e1ac --- /dev/null +++ b/src/testcases/org/apache/poi/hssf/record/TestRecordInputStream.java @@ -0,0 +1,97 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hssf.record; + +import org.apache.poi.util.HexRead; + +import junit.framework.AssertionFailedError; +import junit.framework.TestCase; + +/** + * Tests for {@link RecordInputStream} + * + * @author Josh Micich + */ +public final class TestRecordInputStream extends TestCase { + + /** + * Data inspired by attachment 22626 of bug 45866
+ * A unicode string of 18 chars, with a continue record where the compression flag changes + */ + private static final String HED_DUMP1 = "" + + "1A 59 00 8A 9E 8A " // 3 uncompressed unicode chars + + "3C 00 " // Continue sid + + "10 00 " // rec size 16 (1+15) + + "00" // next chunk is compressed + + "20 2D 20 4D 75 6C 74 69 6C 69 6E 67 75 61 6C " // 15 chars + ; + /** + * same string re-arranged + */ + private static final String HED_DUMP2 = "" + // 15 chars at end of current record + + "4D 75 6C 74 69 6C 69 6E 67 75 61 6C 20 2D 20" + + "3C 00 " // Continue sid + + "07 00 " // rec size 7 (1+6) + + "01" // this bit uncompressed + + "1A 59 00 8A 9E 8A " // 3 uncompressed unicode chars + ; + public void testChangeOfCompressionFlag_bug25866() { + byte[] changingFlagSimpleData = HexRead.readFromString("" + + "AA AA " // fake SID + + "06 00 " // first rec len 6 + + HED_DUMP1 + ); + RecordInputStream in = TestcaseRecordInputStream.create(changingFlagSimpleData); + String actual; + try { + actual = in.readUnicodeLEString(18); + } catch (IllegalArgumentException e) { + if ("compressByte in continue records must be 1 while reading unicode LE string".equals(e.getMessage())) { + throw new AssertionFailedError("Identified bug 45866"); + } + + throw e; + } + assertEquals("\u591A\u8A00\u8A9E - Multilingual", actual); + } + + public void testChangeFromUnCompressedToCompressed() { + byte[] changingFlagSimpleData = HexRead.readFromString("" + + "AA AA " // fake SID + + "0F 00 " // first rec len 15 + + HED_DUMP2 + ); + RecordInputStream in = TestcaseRecordInputStream.create(changingFlagSimpleData); + String actual = in.readCompressedUnicode(18); + assertEquals("Multilingual - \u591A\u8A00\u8A9E", actual); + } + + public void testReadString() { + byte[] changingFlagFullData = HexRead.readFromString("" + + "AA AA " // fake SID + + "12 00 " // first rec len 18 (15 + next 3 bytes) + + "12 00 " // total chars 18 + + "00 " // this bit compressed + + HED_DUMP2 + ); + RecordInputStream in = TestcaseRecordInputStream.create(changingFlagFullData); + String actual = in.readString(); + assertEquals("Multilingual - \u591A\u8A00\u8A9E", actual); + } +} diff --git a/src/testcases/org/apache/poi/hssf/record/TestUnicodeString.java b/src/testcases/org/apache/poi/hssf/record/TestUnicodeString.java index e449f167d..8f3c65be4 100755 --- a/src/testcases/org/apache/poi/hssf/record/TestUnicodeString.java +++ b/src/testcases/org/apache/poi/hssf/record/TestUnicodeString.java @@ -1,4 +1,3 @@ - /* ==================================================================== Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with @@ -19,6 +18,8 @@ package org.apache.poi.hssf.record; +import org.apache.poi.util.HexRead; + import junit.framework.TestCase; /** @@ -26,18 +27,10 @@ import junit.framework.TestCase; * * @author Jason Height (jheight at apache.org) */ -public class TestUnicodeString - extends TestCase -{ +public final class TestUnicodeString extends TestCase { - public TestUnicodeString( String s ) - { - super( s ); - } - public void testSmallStringSize() - throws Exception - { + public void testSmallStringSize() { //Test a basic string UnicodeString s = makeUnicodeString("Test"); UnicodeString.UnicodeRecordStats stats = new UnicodeString.UnicodeRecordStats(); @@ -80,9 +73,7 @@ public class TestUnicodeString assertEquals(30, stats.recordSize); } - public void testPerfectStringSize() - throws Exception - { + public void testPerfectStringSize() { //Test a basic string UnicodeString s = makeUnicodeString(SSTRecord.MAX_RECORD_SIZE-2-1); UnicodeString.UnicodeRecordStats stats = new UnicodeString.UnicodeRecordStats(); @@ -99,9 +90,7 @@ public class TestUnicodeString assertEquals(SSTRecord.MAX_RECORD_SIZE-1, stats.recordSize); } - public void testPerfectRichStringSize() - throws Exception - { + public void testPerfectRichStringSize() { //Test a rich text string UnicodeString s = makeUnicodeString(SSTRecord.MAX_RECORD_SIZE-2-1-8-2); s.addFormatRun(new UnicodeString.FormatRun((short)1,(short)0)); @@ -123,7 +112,7 @@ public class TestUnicodeString assertEquals(SSTRecord.MAX_RECORD_SIZE-1, stats.recordSize); } - public void testContinuedStringSize() throws Exception { + public void testContinuedStringSize() { //Test a basic string UnicodeString s = makeUnicodeString(SSTRecord.MAX_RECORD_SIZE-2-1+20); UnicodeString.UnicodeRecordStats stats = new UnicodeString.UnicodeRecordStats(); @@ -132,7 +121,7 @@ public class TestUnicodeString } /** Tests that a string size calculation that fits neatly in two records, the second being a continue*/ - public void testPerfectContinuedStringSize() throws Exception { + public void testPerfectContinuedStringSize() { //Test a basic string int strSize = SSTRecord.MAX_RECORD_SIZE*2; //String overhead @@ -150,19 +139,18 @@ public class TestUnicodeString - private UnicodeString makeUnicodeString( String s ) + private static UnicodeString makeUnicodeString( String s ) { UnicodeString st = new UnicodeString(s); st.setOptionFlags((byte)0); return st; } - private UnicodeString makeUnicodeString( int numChars) { + private static UnicodeString makeUnicodeString( int numChars) { StringBuffer b = new StringBuffer(numChars); for (int i=0;i