Fixed the double byte bugs in SSTDeserializer. Testcases provided in bugs 15556 and 22742 now work.

Patch for the rel 2.0 branch will follow shortly.

PR: 15556, 22742
Obtained from:
Submitted by:
Reviewed by:


git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353395 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jason Height 2003-10-14 06:54:00 +00:00
parent fc5ca16d35
commit dc44a8bd39
4 changed files with 55 additions and 40 deletions

View File

@ -62,13 +62,14 @@ import org.apache.poi.util.LittleEndianConsts;
* Handles the task of deserializing a SST string. The two main entry points are * Handles the task of deserializing a SST string. The two main entry points are
* *
* @author Glen Stampoultzis (glens at apache.org) * @author Glen Stampoultzis (glens at apache.org)
* @author Jason Height (jheight at apache.org)
*/ */
class SSTDeserializer class SSTDeserializer
{ {
private BinaryTree strings; private BinaryTree strings;
/** this is the number of characters we expect in the first sub-record in a subsequent continuation record */ /** this is the number of characters that have been read prior to the continuation */
private int continuationExpectedChars; private int continuationReadChars;
/** this is the string we were working on before hitting the end of the current record. This string is NOT finished. */ /** this is the string we were working on before hitting the end of the current record. This string is NOT finished. */
private String unfinishedString; private String unfinishedString;
/** this is true if the string uses wide characters */ /** this is true if the string uses wide characters */
@ -82,6 +83,7 @@ class SSTDeserializer
/** Number of characters in current string */ /** Number of characters in current string */
private int charCount; private int charCount;
private int extensionLength; private int extensionLength;
private int continueSkipBytes = 0;
public SSTDeserializer( BinaryTree strings ) public SSTDeserializer( BinaryTree strings )
@ -93,13 +95,14 @@ class SSTDeserializer
private void initVars() private void initVars()
{ {
runCount = 0; runCount = 0;
continuationExpectedChars = 0; continuationReadChars = 0;
unfinishedString = ""; unfinishedString = "";
// bytesInCurrentSegment = 0; // bytesInCurrentSegment = 0;
// stringDataOffset = 0; // stringDataOffset = 0;
wideChar = false; wideChar = false;
richText = false; richText = false;
extendedText = false; extendedText = false;
continueSkipBytes = 0;
} }
/** /**
@ -107,14 +110,15 @@ class SSTDeserializer
* strings may span across multiple continuations. Read the SST record * strings may span across multiple continuations. Read the SST record
* carefully before beginning to hack. * carefully before beginning to hack.
*/ */
public void manufactureStrings( final byte[] data, final int initialOffset, short dataSize ) public void manufactureStrings( final byte[] data, final int initialOffset)
{ {
initVars(); initVars();
int offset = initialOffset; int offset = initialOffset;
while ( ( offset - initialOffset ) < dataSize ) final int dataSize = data.length;
while ( offset < dataSize )
{ {
int remaining = dataSize - offset + initialOffset; int remaining = dataSize - offset;
if ( ( remaining > 0 ) && ( remaining < LittleEndianConsts.SHORT_SIZE ) ) if ( ( remaining > 0 ) && ( remaining < LittleEndianConsts.SHORT_SIZE ) )
{ {
@ -122,26 +126,31 @@ class SSTDeserializer
} }
if ( remaining == LittleEndianConsts.SHORT_SIZE ) if ( remaining == LittleEndianConsts.SHORT_SIZE )
{ {
setContinuationExpectedChars( LittleEndian.getUShort( data, offset ) ); //JMH Dont know about this
setContinuationCharsRead( 0 );//LittleEndian.getUShort( data, offset ) );
unfinishedString = ""; unfinishedString = "";
break; break;
} }
charCount = LittleEndian.getUShort( data, offset ); charCount = LittleEndian.getUShort( data, offset );
int charsRead = charCount;
readStringHeader( data, offset ); readStringHeader( data, offset );
boolean stringContinuesOverContinuation = remaining < totalStringSize(); boolean stringContinuesOverContinuation = remaining < totalStringSize();
if ( stringContinuesOverContinuation ) if ( stringContinuesOverContinuation )
{ {
int remainingBytes = ( initialOffset + dataSize ) - offset - stringHeaderOverhead(); int remainingBytes = dataSize - offset - stringHeaderOverhead();
setContinuationExpectedChars( charCount - calculateCharCount( remainingBytes ) ); //Only read the size of the string or whatever is left before the
charCount -= getContinuationExpectedChars(); //continuation
charsRead = Math.min(charsRead, calculateCharCount( remainingBytes ));
setContinuationCharsRead( charsRead );
if (charsRead == charCount) {
//Since all of the characters will have been read, but the entire string (including formatting runs etc)
//hasnt, Compute the number of bytes to skip when the continue record starts
continueSkipBytes = offsetForContinuedRecord(0) - (remainingBytes - calculateByteCount(charsRead));
} }
else
{
setContinuationExpectedChars( 0 );
} }
processString( data, offset, charCount ); processString( data, offset, charsRead );
offset += totalStringSize(); offset += totalStringSize();
if ( getContinuationExpectedChars() != 0 ) if ( stringContinuesOverContinuation )
{ {
break; break;
} }
@ -222,6 +231,7 @@ class SSTDeserializer
UnicodeString string = new UnicodeString( UnicodeString.sid, UnicodeString string = new UnicodeString( UnicodeString.sid,
(short) unicodeStringBuffer.length, (short) unicodeStringBuffer.length,
unicodeStringBuffer ); unicodeStringBuffer );
setContinuationCharsRead( calculateCharCount(bytesRead));
if ( isStringFinished() ) if ( isStringFinished() )
{ {
@ -238,7 +248,7 @@ class SSTDeserializer
private boolean isStringFinished() private boolean isStringFinished()
{ {
return getContinuationExpectedChars() == 0; return getContinuationCharsRead() == charCount;
} }
/** /**
@ -300,8 +310,9 @@ class SSTDeserializer
{ {
if ( isStringFinished() ) if ( isStringFinished() )
{ {
final int offset = continueSkipBytes;
initVars(); initVars();
manufactureStrings( record, 0, (short) record.length ); manufactureStrings( record, offset);
} }
else else
{ {
@ -329,13 +340,12 @@ class SSTDeserializer
*/ */
private void readStringRemainder( final byte[] record ) private void readStringRemainder( final byte[] record )
{ {
int stringRemainderSizeInBytes = calculateByteCount( getContinuationExpectedChars() ); int stringRemainderSizeInBytes = calculateByteCount( charCount-getContinuationCharsRead() );
// stringDataOffset = LittleEndianConsts.BYTE_SIZE;
byte[] unicodeStringData = new byte[SSTRecord.STRING_MINIMAL_OVERHEAD byte[] unicodeStringData = new byte[SSTRecord.STRING_MINIMAL_OVERHEAD
+ calculateByteCount( getContinuationExpectedChars() )]; + stringRemainderSizeInBytes];
// write the string length // write the string length
LittleEndian.putShort( unicodeStringData, 0, (short) getContinuationExpectedChars() ); LittleEndian.putShort( unicodeStringData, 0, (short) (charCount-getContinuationCharsRead()) );
// write the options flag // write the options flag
unicodeStringData[LittleEndianConsts.SHORT_SIZE] = createOptionByte( wideChar, richText, extendedText ); unicodeStringData[LittleEndianConsts.SHORT_SIZE] = createOptionByte( wideChar, richText, extendedText );
@ -344,7 +354,7 @@ class SSTDeserializer
// past all the overhead of the str_data array // past all the overhead of the str_data array
arraycopy( record, LittleEndianConsts.BYTE_SIZE, unicodeStringData, arraycopy( record, LittleEndianConsts.BYTE_SIZE, unicodeStringData,
SSTRecord.STRING_MINIMAL_OVERHEAD, SSTRecord.STRING_MINIMAL_OVERHEAD,
unicodeStringData.length - SSTRecord.STRING_MINIMAL_OVERHEAD ); stringRemainderSizeInBytes );
// use special constructor to create the final string // use special constructor to create the final string
UnicodeString string = new UnicodeString( UnicodeString.sid, UnicodeString string = new UnicodeString( UnicodeString.sid,
@ -355,7 +365,7 @@ class SSTDeserializer
addToStringTable( strings, integer, string ); addToStringTable( strings, integer, string );
int newOffset = offsetForContinuedRecord( stringRemainderSizeInBytes ); int newOffset = offsetForContinuedRecord( stringRemainderSizeInBytes );
manufactureStrings( record, newOffset, (short) ( record.length - newOffset ) ); manufactureStrings( record, newOffset);
} }
/** /**
@ -387,8 +397,12 @@ class SSTDeserializer
private int offsetForContinuedRecord( int stringRemainderSizeInBytes ) private int offsetForContinuedRecord( int stringRemainderSizeInBytes )
{ {
return stringRemainderSizeInBytes + LittleEndianConsts.BYTE_SIZE int offset = stringRemainderSizeInBytes + runCount * LittleEndianConsts.INT_SIZE + extensionLength;
+ runCount * LittleEndianConsts.INT_SIZE + extensionLength; if (stringRemainderSizeInBytes != 0)
//If a portion of the string remains then the wideChar options byte is repeated,
//so need to skip this.
offset += + LittleEndianConsts.BYTE_SIZE;
return offset;
} }
private byte createOptionByte( boolean wideChar, boolean richText, boolean farEast ) private byte createOptionByte( boolean wideChar, boolean richText, boolean farEast )
@ -408,17 +422,18 @@ class SSTDeserializer
int dataLengthInBytes = record.length - LittleEndianConsts.BYTE_SIZE; int dataLengthInBytes = record.length - LittleEndianConsts.BYTE_SIZE;
byte[] unicodeStringData = new byte[record.length + LittleEndianConsts.SHORT_SIZE]; byte[] unicodeStringData = new byte[record.length + LittleEndianConsts.SHORT_SIZE];
LittleEndian.putShort( unicodeStringData, (byte) 0, (short) calculateCharCount( dataLengthInBytes ) ); int charsRead = calculateCharCount( dataLengthInBytes );
LittleEndian.putShort( unicodeStringData, (byte) 0, (short) charsRead );
arraycopy( record, 0, unicodeStringData, LittleEndianConsts.SHORT_SIZE, record.length ); arraycopy( record, 0, unicodeStringData, LittleEndianConsts.SHORT_SIZE, record.length );
UnicodeString ucs = new UnicodeString( UnicodeString.sid, (short) unicodeStringData.length, unicodeStringData ); UnicodeString ucs = new UnicodeString( UnicodeString.sid, (short) unicodeStringData.length, unicodeStringData );
unfinishedString = unfinishedString + ucs.getString(); unfinishedString = unfinishedString + ucs.getString();
setContinuationExpectedChars( getContinuationExpectedChars() - calculateCharCount( dataLengthInBytes ) ); setContinuationCharsRead( charsRead );
} }
private boolean stringSpansContinuation( int continuationSizeInBytes ) private boolean stringSpansContinuation( int continuationSizeInBytes )
{ {
return calculateByteCount( getContinuationExpectedChars() ) > continuationSizeInBytes; return calculateByteCount( charCount - getContinuationCharsRead() ) > continuationSizeInBytes;
} }
/** /**
@ -426,14 +441,14 @@ class SSTDeserializer
* sub-record in a subsequent continuation record * sub-record in a subsequent continuation record
*/ */
int getContinuationExpectedChars() int getContinuationCharsRead()
{ {
return continuationExpectedChars; return continuationReadChars;
} }
private void setContinuationExpectedChars( final int count ) private void setContinuationCharsRead( final int count )
{ {
continuationExpectedChars = count; continuationReadChars = count;
} }
private int calculateByteCount( final int character_count ) private int calculateByteCount( final int character_count )

View File

@ -482,7 +482,7 @@ public class SSTRecord
field_2_num_unique_strings = LittleEndian.getInt( data, 4 + offset ); field_2_num_unique_strings = LittleEndian.getInt( data, 4 + offset );
field_3_strings = new BinaryTree(); field_3_strings = new BinaryTree();
deserializer = new SSTDeserializer(field_3_strings); deserializer = new SSTDeserializer(field_3_strings);
deserializer.manufactureStrings( data, 8 + offset, (short)(size - 8) ); deserializer.manufactureStrings( data, 8 + offset);
} }

View File

@ -88,7 +88,7 @@ public class TestSSTDeserializer
byte[] bytes = HexRead.readData( _test_file_path + File.separator + "richtextdata.txt", "header" ); byte[] bytes = HexRead.readData( _test_file_path + File.separator + "richtextdata.txt", "header" );
BinaryTree strings = new BinaryTree(); BinaryTree strings = new BinaryTree();
SSTDeserializer deserializer = new SSTDeserializer( strings ); SSTDeserializer deserializer = new SSTDeserializer( strings );
deserializer.manufactureStrings( bytes, 0, (short)bytes.length ); deserializer.manufactureStrings( bytes, 0);
byte[] continueBytes = HexRead.readData( _test_file_path + File.separator + "richtextdata.txt", "continue1" ); byte[] continueBytes = HexRead.readData( _test_file_path + File.separator + "richtextdata.txt", "continue1" );
deserializer.processContinueRecord( continueBytes ); deserializer.processContinueRecord( continueBytes );
@ -101,7 +101,7 @@ public class TestSSTDeserializer
byte[] bytes = HexRead.readData( _test_file_path + File.separator + "evencontinuation.txt", "header" ); byte[] bytes = HexRead.readData( _test_file_path + File.separator + "evencontinuation.txt", "header" );
BinaryTree strings = new BinaryTree(); BinaryTree strings = new BinaryTree();
SSTDeserializer deserializer = new SSTDeserializer( strings ); SSTDeserializer deserializer = new SSTDeserializer( strings );
deserializer.manufactureStrings( bytes, 0, (short)bytes.length ); deserializer.manufactureStrings( bytes, 0);
byte[] continueBytes = HexRead.readData( _test_file_path + File.separator + "evencontinuation.txt", "continue1" ); byte[] continueBytes = HexRead.readData( _test_file_path + File.separator + "evencontinuation.txt", "continue1" );
deserializer.processContinueRecord( continueBytes ); deserializer.processContinueRecord( continueBytes );
@ -119,7 +119,7 @@ public class TestSSTDeserializer
byte[] bytes = HexRead.readData( _test_file_path + File.separator + "stringacross2continuations.txt", "header" ); byte[] bytes = HexRead.readData( _test_file_path + File.separator + "stringacross2continuations.txt", "header" );
BinaryTree strings = new BinaryTree(); BinaryTree strings = new BinaryTree();
SSTDeserializer deserializer = new SSTDeserializer( strings ); SSTDeserializer deserializer = new SSTDeserializer( strings );
deserializer.manufactureStrings( bytes, 0, (short)bytes.length ); deserializer.manufactureStrings( bytes, 0);
bytes = HexRead.readData( _test_file_path + File.separator + "stringacross2continuations.txt", "continue1" ); bytes = HexRead.readData( _test_file_path + File.separator + "stringacross2continuations.txt", "continue1" );
deserializer.processContinueRecord( bytes ); deserializer.processContinueRecord( bytes );
bytes = HexRead.readData( _test_file_path + File.separator + "stringacross2continuations.txt", "continue2" ); bytes = HexRead.readData( _test_file_path + File.separator + "stringacross2continuations.txt", "continue2" );
@ -136,7 +136,7 @@ public class TestSSTDeserializer
byte[] bytes = HexRead.readData( _test_file_path + File.separator + "extendedtextstrings.txt", "rich-header" ); byte[] bytes = HexRead.readData( _test_file_path + File.separator + "extendedtextstrings.txt", "rich-header" );
BinaryTree strings = new BinaryTree(); BinaryTree strings = new BinaryTree();
SSTDeserializer deserializer = new SSTDeserializer( strings ); SSTDeserializer deserializer = new SSTDeserializer( strings );
deserializer.manufactureStrings( bytes, 0, (short)bytes.length ); deserializer.manufactureStrings( bytes, 0);
byte[] continueBytes = HexRead.readData( _test_file_path + File.separator + "extendedtextstrings.txt", "rich-continue1" ); byte[] continueBytes = HexRead.readData( _test_file_path + File.separator + "extendedtextstrings.txt", "rich-continue1" );
deserializer.processContinueRecord( continueBytes ); deserializer.processContinueRecord( continueBytes );
@ -146,7 +146,7 @@ public class TestSSTDeserializer
bytes = HexRead.readData( _test_file_path + File.separator + "extendedtextstrings.txt", "norich-header" ); bytes = HexRead.readData( _test_file_path + File.separator + "extendedtextstrings.txt", "norich-header" );
strings = new BinaryTree(); strings = new BinaryTree();
deserializer = new SSTDeserializer( strings ); deserializer = new SSTDeserializer( strings );
deserializer.manufactureStrings( bytes, 0, (short)bytes.length ); deserializer.manufactureStrings( bytes, 0);
continueBytes = HexRead.readData( _test_file_path + File.separator + "extendedtextstrings.txt", "norich-continue1" ); continueBytes = HexRead.readData( _test_file_path + File.separator + "extendedtextstrings.txt", "norich-continue1" );
deserializer.processContinueRecord( continueBytes ); deserializer.processContinueRecord( continueBytes );

View File

@ -429,7 +429,7 @@ public class TestSSTRecord
assertEquals( 1464, record.getNumStrings() ); assertEquals( 1464, record.getNumStrings() );
assertEquals( 688, record.getNumUniqueStrings() ); assertEquals( 688, record.getNumUniqueStrings() );
assertEquals( 492, record.countStrings() ); assertEquals( 492, record.countStrings() );
assertEquals( 1, record.getDeserializer().getContinuationExpectedChars() ); //jmh assertEquals( 1, record.getDeserializer().getContinuationExpectedChars() );
assertEquals( "Consolidated B-24J Liberator The Dragon & His Tai", assertEquals( "Consolidated B-24J Liberator The Dragon & His Tai",
record.getDeserializer().getUnfinishedString() ); record.getDeserializer().getUnfinishedString() );
// assertEquals( 52, record.getDeserializer().getTotalLength() ); // assertEquals( 52, record.getDeserializer().getTotalLength() );
@ -448,7 +448,7 @@ public class TestSSTRecord
assertEquals( 0, record.getNumStrings() ); assertEquals( 0, record.getNumStrings() );
assertEquals( 0, record.getNumUniqueStrings() ); assertEquals( 0, record.getNumUniqueStrings() );
assertEquals( 0, record.countStrings() ); assertEquals( 0, record.countStrings() );
assertEquals( 0, record.getDeserializer().getContinuationExpectedChars() ); assertEquals( 0, record.getDeserializer().getContinuationCharsRead() );
assertEquals( "", record.getDeserializer().getUnfinishedString() ); assertEquals( "", record.getDeserializer().getUnfinishedString() );
// assertEquals( 0, record.getDeserializer().getTotalLength() ); // assertEquals( 0, record.getDeserializer().getTotalLength() );
// assertEquals( 0, record.getDeserializer().getStringDataOffset() ); // assertEquals( 0, record.getDeserializer().getStringDataOffset() );