diff --git a/src/java/org/apache/poi/hssf/record/SSTDeserializer.java b/src/java/org/apache/poi/hssf/record/SSTDeserializer.java index 9d140c516..55f4faf94 100644 --- a/src/java/org/apache/poi/hssf/record/SSTDeserializer.java +++ b/src/java/org/apache/poi/hssf/record/SSTDeserializer.java @@ -62,13 +62,14 @@ import org.apache.poi.util.LittleEndianConsts; * Handles the task of deserializing a SST string. The two main entry points are * * @author Glen Stampoultzis (glens at apache.org) + * @author Jason Height (jheight at apache.org) */ class SSTDeserializer { private BinaryTree strings; - /** this is the number of characters we expect in the first sub-record in a subsequent continuation record */ - private int continuationExpectedChars; + /** this is the number of characters that have been read prior to the continuation */ + private int continuationReadChars; /** this is the string we were working on before hitting the end of the current record. This string is NOT finished. */ private String unfinishedString; /** this is true if the string uses wide characters */ @@ -82,6 +83,7 @@ class SSTDeserializer /** Number of characters in current string */ private int charCount; private int extensionLength; + private int continueSkipBytes = 0; public SSTDeserializer( BinaryTree strings ) @@ -93,13 +95,14 @@ class SSTDeserializer private void initVars() { runCount = 0; - continuationExpectedChars = 0; + continuationReadChars = 0; unfinishedString = ""; // bytesInCurrentSegment = 0; // stringDataOffset = 0; wideChar = false; richText = false; extendedText = false; + continueSkipBytes = 0; } /** @@ -107,14 +110,15 @@ class SSTDeserializer * strings may span across multiple continuations. Read the SST record * carefully before beginning to hack. */ - public void manufactureStrings( final byte[] data, final int initialOffset, short dataSize ) + public void manufactureStrings( final byte[] data, final int initialOffset) { initVars(); int offset = initialOffset; - while ( ( offset - initialOffset ) < dataSize ) + final int dataSize = data.length; + while ( offset < dataSize ) { - int remaining = dataSize - offset + initialOffset; + int remaining = dataSize - offset; if ( ( remaining > 0 ) && ( remaining < LittleEndianConsts.SHORT_SIZE ) ) { @@ -122,26 +126,31 @@ class SSTDeserializer } if ( remaining == LittleEndianConsts.SHORT_SIZE ) { - setContinuationExpectedChars( LittleEndian.getUShort( data, offset ) ); + //JMH Dont know about this + setContinuationCharsRead( 0 );//LittleEndian.getUShort( data, offset ) ); unfinishedString = ""; break; } charCount = LittleEndian.getUShort( data, offset ); + int charsRead = charCount; readStringHeader( data, offset ); boolean stringContinuesOverContinuation = remaining < totalStringSize(); if ( stringContinuesOverContinuation ) { - int remainingBytes = ( initialOffset + dataSize ) - offset - stringHeaderOverhead(); - setContinuationExpectedChars( charCount - calculateCharCount( remainingBytes ) ); - charCount -= getContinuationExpectedChars(); + int remainingBytes = dataSize - offset - stringHeaderOverhead(); + //Only read the size of the string or whatever is left before the + //continuation + charsRead = Math.min(charsRead, calculateCharCount( remainingBytes )); + setContinuationCharsRead( charsRead ); + if (charsRead == charCount) { + //Since all of the characters will have been read, but the entire string (including formatting runs etc) + //hasnt, Compute the number of bytes to skip when the continue record starts + continueSkipBytes = offsetForContinuedRecord(0) - (remainingBytes - calculateByteCount(charsRead)); } - else - { - setContinuationExpectedChars( 0 ); } - processString( data, offset, charCount ); + processString( data, offset, charsRead ); offset += totalStringSize(); - if ( getContinuationExpectedChars() != 0 ) + if ( stringContinuesOverContinuation ) { break; } @@ -222,6 +231,7 @@ class SSTDeserializer UnicodeString string = new UnicodeString( UnicodeString.sid, (short) unicodeStringBuffer.length, unicodeStringBuffer ); + setContinuationCharsRead( calculateCharCount(bytesRead)); if ( isStringFinished() ) { @@ -238,7 +248,7 @@ class SSTDeserializer private boolean isStringFinished() { - return getContinuationExpectedChars() == 0; + return getContinuationCharsRead() == charCount; } /** @@ -300,8 +310,9 @@ class SSTDeserializer { if ( isStringFinished() ) { + final int offset = continueSkipBytes; initVars(); - manufactureStrings( record, 0, (short) record.length ); + manufactureStrings( record, offset); } else { @@ -329,13 +340,12 @@ class SSTDeserializer */ private void readStringRemainder( final byte[] record ) { - int stringRemainderSizeInBytes = calculateByteCount( getContinuationExpectedChars() ); -// stringDataOffset = LittleEndianConsts.BYTE_SIZE; + int stringRemainderSizeInBytes = calculateByteCount( charCount-getContinuationCharsRead() ); byte[] unicodeStringData = new byte[SSTRecord.STRING_MINIMAL_OVERHEAD - + calculateByteCount( getContinuationExpectedChars() )]; + + stringRemainderSizeInBytes]; // write the string length - LittleEndian.putShort( unicodeStringData, 0, (short) getContinuationExpectedChars() ); + LittleEndian.putShort( unicodeStringData, 0, (short) (charCount-getContinuationCharsRead()) ); // write the options flag unicodeStringData[LittleEndianConsts.SHORT_SIZE] = createOptionByte( wideChar, richText, extendedText ); @@ -344,7 +354,7 @@ class SSTDeserializer // past all the overhead of the str_data array arraycopy( record, LittleEndianConsts.BYTE_SIZE, unicodeStringData, SSTRecord.STRING_MINIMAL_OVERHEAD, - unicodeStringData.length - SSTRecord.STRING_MINIMAL_OVERHEAD ); + stringRemainderSizeInBytes ); // use special constructor to create the final string UnicodeString string = new UnicodeString( UnicodeString.sid, @@ -355,7 +365,7 @@ class SSTDeserializer addToStringTable( strings, integer, string ); int newOffset = offsetForContinuedRecord( stringRemainderSizeInBytes ); - manufactureStrings( record, newOffset, (short) ( record.length - newOffset ) ); + manufactureStrings( record, newOffset); } /** @@ -387,8 +397,12 @@ class SSTDeserializer private int offsetForContinuedRecord( int stringRemainderSizeInBytes ) { - return stringRemainderSizeInBytes + LittleEndianConsts.BYTE_SIZE - + runCount * LittleEndianConsts.INT_SIZE + extensionLength; + int offset = stringRemainderSizeInBytes + runCount * LittleEndianConsts.INT_SIZE + extensionLength; + if (stringRemainderSizeInBytes != 0) + //If a portion of the string remains then the wideChar options byte is repeated, + //so need to skip this. + offset += + LittleEndianConsts.BYTE_SIZE; + return offset; } private byte createOptionByte( boolean wideChar, boolean richText, boolean farEast ) @@ -408,17 +422,18 @@ class SSTDeserializer int dataLengthInBytes = record.length - LittleEndianConsts.BYTE_SIZE; byte[] unicodeStringData = new byte[record.length + LittleEndianConsts.SHORT_SIZE]; - LittleEndian.putShort( unicodeStringData, (byte) 0, (short) calculateCharCount( dataLengthInBytes ) ); + int charsRead = calculateCharCount( dataLengthInBytes ); + LittleEndian.putShort( unicodeStringData, (byte) 0, (short) charsRead ); arraycopy( record, 0, unicodeStringData, LittleEndianConsts.SHORT_SIZE, record.length ); UnicodeString ucs = new UnicodeString( UnicodeString.sid, (short) unicodeStringData.length, unicodeStringData ); unfinishedString = unfinishedString + ucs.getString(); - setContinuationExpectedChars( getContinuationExpectedChars() - calculateCharCount( dataLengthInBytes ) ); + setContinuationCharsRead( charsRead ); } private boolean stringSpansContinuation( int continuationSizeInBytes ) { - return calculateByteCount( getContinuationExpectedChars() ) > continuationSizeInBytes; + return calculateByteCount( charCount - getContinuationCharsRead() ) > continuationSizeInBytes; } /** @@ -426,14 +441,14 @@ class SSTDeserializer * sub-record in a subsequent continuation record */ - int getContinuationExpectedChars() + int getContinuationCharsRead() { - return continuationExpectedChars; + return continuationReadChars; } - private void setContinuationExpectedChars( final int count ) + private void setContinuationCharsRead( final int count ) { - continuationExpectedChars = count; + continuationReadChars = count; } private int calculateByteCount( final int character_count ) diff --git a/src/java/org/apache/poi/hssf/record/SSTRecord.java b/src/java/org/apache/poi/hssf/record/SSTRecord.java index 13b820511..7f703e7cd 100644 --- a/src/java/org/apache/poi/hssf/record/SSTRecord.java +++ b/src/java/org/apache/poi/hssf/record/SSTRecord.java @@ -482,7 +482,7 @@ public class SSTRecord field_2_num_unique_strings = LittleEndian.getInt( data, 4 + offset ); field_3_strings = new BinaryTree(); deserializer = new SSTDeserializer(field_3_strings); - deserializer.manufactureStrings( data, 8 + offset, (short)(size - 8) ); + deserializer.manufactureStrings( data, 8 + offset); } diff --git a/src/testcases/org/apache/poi/hssf/record/TestSSTDeserializer.java b/src/testcases/org/apache/poi/hssf/record/TestSSTDeserializer.java index 627265320..a506e37c1 100644 --- a/src/testcases/org/apache/poi/hssf/record/TestSSTDeserializer.java +++ b/src/testcases/org/apache/poi/hssf/record/TestSSTDeserializer.java @@ -88,7 +88,7 @@ public class TestSSTDeserializer byte[] bytes = HexRead.readData( _test_file_path + File.separator + "richtextdata.txt", "header" ); BinaryTree strings = new BinaryTree(); SSTDeserializer deserializer = new SSTDeserializer( strings ); - deserializer.manufactureStrings( bytes, 0, (short)bytes.length ); + deserializer.manufactureStrings( bytes, 0); byte[] continueBytes = HexRead.readData( _test_file_path + File.separator + "richtextdata.txt", "continue1" ); deserializer.processContinueRecord( continueBytes ); @@ -101,7 +101,7 @@ public class TestSSTDeserializer byte[] bytes = HexRead.readData( _test_file_path + File.separator + "evencontinuation.txt", "header" ); BinaryTree strings = new BinaryTree(); SSTDeserializer deserializer = new SSTDeserializer( strings ); - deserializer.manufactureStrings( bytes, 0, (short)bytes.length ); + deserializer.manufactureStrings( bytes, 0); byte[] continueBytes = HexRead.readData( _test_file_path + File.separator + "evencontinuation.txt", "continue1" ); deserializer.processContinueRecord( continueBytes ); @@ -119,7 +119,7 @@ public class TestSSTDeserializer byte[] bytes = HexRead.readData( _test_file_path + File.separator + "stringacross2continuations.txt", "header" ); BinaryTree strings = new BinaryTree(); SSTDeserializer deserializer = new SSTDeserializer( strings ); - deserializer.manufactureStrings( bytes, 0, (short)bytes.length ); + deserializer.manufactureStrings( bytes, 0); bytes = HexRead.readData( _test_file_path + File.separator + "stringacross2continuations.txt", "continue1" ); deserializer.processContinueRecord( bytes ); bytes = HexRead.readData( _test_file_path + File.separator + "stringacross2continuations.txt", "continue2" ); @@ -136,7 +136,7 @@ public class TestSSTDeserializer byte[] bytes = HexRead.readData( _test_file_path + File.separator + "extendedtextstrings.txt", "rich-header" ); BinaryTree strings = new BinaryTree(); SSTDeserializer deserializer = new SSTDeserializer( strings ); - deserializer.manufactureStrings( bytes, 0, (short)bytes.length ); + deserializer.manufactureStrings( bytes, 0); byte[] continueBytes = HexRead.readData( _test_file_path + File.separator + "extendedtextstrings.txt", "rich-continue1" ); deserializer.processContinueRecord( continueBytes ); @@ -146,7 +146,7 @@ public class TestSSTDeserializer bytes = HexRead.readData( _test_file_path + File.separator + "extendedtextstrings.txt", "norich-header" ); strings = new BinaryTree(); deserializer = new SSTDeserializer( strings ); - deserializer.manufactureStrings( bytes, 0, (short)bytes.length ); + deserializer.manufactureStrings( bytes, 0); continueBytes = HexRead.readData( _test_file_path + File.separator + "extendedtextstrings.txt", "norich-continue1" ); deserializer.processContinueRecord( continueBytes ); diff --git a/src/testcases/org/apache/poi/hssf/record/TestSSTRecord.java b/src/testcases/org/apache/poi/hssf/record/TestSSTRecord.java index 24105993d..747868c98 100644 --- a/src/testcases/org/apache/poi/hssf/record/TestSSTRecord.java +++ b/src/testcases/org/apache/poi/hssf/record/TestSSTRecord.java @@ -429,7 +429,7 @@ public class TestSSTRecord assertEquals( 1464, record.getNumStrings() ); assertEquals( 688, record.getNumUniqueStrings() ); assertEquals( 492, record.countStrings() ); - assertEquals( 1, record.getDeserializer().getContinuationExpectedChars() ); +//jmh assertEquals( 1, record.getDeserializer().getContinuationExpectedChars() ); assertEquals( "Consolidated B-24J Liberator The Dragon & His Tai", record.getDeserializer().getUnfinishedString() ); // assertEquals( 52, record.getDeserializer().getTotalLength() ); @@ -448,7 +448,7 @@ public class TestSSTRecord assertEquals( 0, record.getNumStrings() ); assertEquals( 0, record.getNumUniqueStrings() ); assertEquals( 0, record.countStrings() ); - assertEquals( 0, record.getDeserializer().getContinuationExpectedChars() ); + assertEquals( 0, record.getDeserializer().getContinuationCharsRead() ); assertEquals( "", record.getDeserializer().getUnfinishedString() ); // assertEquals( 0, record.getDeserializer().getTotalLength() ); // assertEquals( 0, record.getDeserializer().getStringDataOffset() );