SST fixed!!! Yay... Will reliably read in spreadsheets that have rich text or extended text. Code is a bit cleaner now but could still use more improvement. If I have the energy I'll look into it.
git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/branches/REL_1_5_BRANCH@352663 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8a4d120c34
commit
d7672fa259
@ -68,7 +68,7 @@
|
|||||||
|
|
||||||
<version major="1"
|
<version major="1"
|
||||||
minor="5"
|
minor="5"
|
||||||
fix ="0"
|
fix ="1"
|
||||||
tag="dev"/>
|
tag="dev"/>
|
||||||
|
|
||||||
<package>org.apache.poi</package>
|
<package>org.apache.poi</package>
|
||||||
|
@ -1,12 +1,68 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
* The Apache Software License, Version 1.1
|
||||||
|
*
|
||||||
|
* Copyright (c) 2002 The Apache Software Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* 3. The end-user documentation included with the redistribution,
|
||||||
|
* if any, must include the following acknowledgment:
|
||||||
|
* "This product includes software developed by the
|
||||||
|
* Apache Software Foundation (http://www.apache.org/)."
|
||||||
|
* Alternately, this acknowledgment may appear in the software itself,
|
||||||
|
* if and wherever such third-party acknowledgments normally appear.
|
||||||
|
*
|
||||||
|
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||||
|
* "Apache POI" must not be used to endorse or promote products
|
||||||
|
* derived from this software without prior written permission. For
|
||||||
|
* written permission, please contact apache@apache.org.
|
||||||
|
*
|
||||||
|
* 5. Products derived from this software may not be called "Apache",
|
||||||
|
* "Apache POI", nor may "Apache" appear in their name, without
|
||||||
|
* prior written permission of the Apache Software Foundation.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||||
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||||
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*/
|
||||||
|
|
||||||
package org.apache.poi.hssf.record;
|
package org.apache.poi.hssf.record;
|
||||||
|
|
||||||
|
import org.apache.poi.util.BinaryTree;
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
import org.apache.poi.util.LittleEndianConsts;
|
import org.apache.poi.util.LittleEndianConsts;
|
||||||
import org.apache.poi.util.BinaryTree;
|
|
||||||
import org.apache.poi.util.HexDump;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handles the task of deserializing a SST string. The two main entry points are
|
||||||
|
*
|
||||||
|
* @author Glen Stampoultzis (glens at apache.org)
|
||||||
|
*/
|
||||||
class SSTDeserializer
|
class SSTDeserializer
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -15,22 +71,35 @@ class SSTDeserializer
|
|||||||
private int continuationExpectedChars;
|
private int continuationExpectedChars;
|
||||||
/** this is the string we were working on before hitting the end of the current record. This string is NOT finished. */
|
/** this is the string we were working on before hitting the end of the current record. This string is NOT finished. */
|
||||||
private String unfinishedString;
|
private String unfinishedString;
|
||||||
/** this is the total length of the current string being handled */
|
|
||||||
private int totalLengthBytes;
|
|
||||||
/** this is the offset into a string field of the actual string data */
|
|
||||||
private int stringDataOffset;
|
|
||||||
/** this is true if the string uses wide characters */
|
/** this is true if the string uses wide characters */
|
||||||
private boolean wideChar;
|
private boolean wideChar;
|
||||||
|
/** this is true if the string is a rich text string */
|
||||||
|
private boolean richText;
|
||||||
|
/** this is true if the string is a far east string or some other wierd string */
|
||||||
|
private boolean extendedText;
|
||||||
|
/** Number of formatting runs in this rich text field */
|
||||||
|
private short runCount;
|
||||||
|
/** Number of characters in current string */
|
||||||
|
private int charCount;
|
||||||
|
private int extensionLength;
|
||||||
|
|
||||||
|
|
||||||
public SSTDeserializer( BinaryTree strings )
|
public SSTDeserializer( BinaryTree strings )
|
||||||
{
|
{
|
||||||
this.strings = strings;
|
this.strings = strings;
|
||||||
setExpectedChars( 0 );
|
initVars();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initVars()
|
||||||
|
{
|
||||||
|
runCount = 0;
|
||||||
|
continuationExpectedChars = 0;
|
||||||
unfinishedString = "";
|
unfinishedString = "";
|
||||||
totalLengthBytes = 0;
|
// bytesInCurrentSegment = 0;
|
||||||
stringDataOffset = 0;
|
// stringDataOffset = 0;
|
||||||
wideChar = false;
|
wideChar = false;
|
||||||
|
richText = false;
|
||||||
|
extendedText = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -38,14 +107,14 @@ class SSTDeserializer
|
|||||||
* strings may span across multiple continuations. Read the SST record
|
* strings may span across multiple continuations. Read the SST record
|
||||||
* carefully before beginning to hack.
|
* carefully before beginning to hack.
|
||||||
*/
|
*/
|
||||||
public void manufactureStrings( final byte[] data, final int index,
|
public void manufactureStrings( final byte[] data, final int initialOffset, short dataSize )
|
||||||
short size )
|
|
||||||
{
|
{
|
||||||
int offset = index;
|
initVars();
|
||||||
|
|
||||||
while ( offset < size )
|
int offset = initialOffset;
|
||||||
|
while ( ( offset - initialOffset ) < dataSize )
|
||||||
{
|
{
|
||||||
int remaining = size - offset;
|
int remaining = dataSize - offset + initialOffset;
|
||||||
|
|
||||||
if ( ( remaining > 0 ) && ( remaining < LittleEndianConsts.SHORT_SIZE ) )
|
if ( ( remaining > 0 ) && ( remaining < LittleEndianConsts.SHORT_SIZE ) )
|
||||||
{
|
{
|
||||||
@ -53,90 +122,38 @@ class SSTDeserializer
|
|||||||
}
|
}
|
||||||
if ( remaining == LittleEndianConsts.SHORT_SIZE )
|
if ( remaining == LittleEndianConsts.SHORT_SIZE )
|
||||||
{
|
{
|
||||||
setExpectedChars( LittleEndian.getUShort( data, offset ) );
|
setContinuationExpectedChars( LittleEndian.getUShort( data, offset ) );
|
||||||
unfinishedString = "";
|
unfinishedString = "";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
short charCount = LittleEndian.getShort( data, offset );
|
charCount = LittleEndian.getUShort( data, offset );
|
||||||
|
readStringHeader( data, offset );
|
||||||
setupStringParameters( data, offset, charCount );
|
boolean stringContinuesOverContinuation = remaining < totalStringSize();
|
||||||
if ( remaining < totalLengthBytes )
|
if ( stringContinuesOverContinuation )
|
||||||
{
|
{
|
||||||
setExpectedChars( calculateCharCount( totalLengthBytes - remaining ) );
|
int remainingBytes = ( initialOffset + dataSize ) - offset - stringHeaderOverhead();
|
||||||
charCount -= getExpectedChars();
|
setContinuationExpectedChars( charCount - calculateCharCount( remainingBytes ) );
|
||||||
totalLengthBytes = remaining;
|
charCount -= getContinuationExpectedChars();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
setExpectedChars( 0 );
|
setContinuationExpectedChars( 0 );
|
||||||
}
|
}
|
||||||
processString( data, offset, charCount );
|
processString( data, offset, charCount );
|
||||||
offset += totalLengthBytes;
|
offset += totalStringSize();
|
||||||
if ( getExpectedChars() != 0 )
|
if ( getContinuationExpectedChars() != 0 )
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// private void dump( final byte[] data, int offset, int length )
|
||||||
/**
|
// {
|
||||||
* Detemines the option types for the string (ie, compressed or uncompressed unicode, rich text string or
|
|
||||||
* plain string etc) and calculates the length and offset for the string.
|
|
||||||
*
|
|
||||||
* @param data
|
|
||||||
* @param index
|
|
||||||
* @param char_count
|
|
||||||
*/
|
|
||||||
private void setupStringParameters( final byte[] data, final int index,
|
|
||||||
final int char_count )
|
|
||||||
{
|
|
||||||
byte optionFlag = data[index + LittleEndianConsts.SHORT_SIZE];
|
|
||||||
|
|
||||||
wideChar = ( optionFlag & 1 ) == 1;
|
|
||||||
boolean farEast = ( optionFlag & 4 ) == 4;
|
|
||||||
boolean richText = ( optionFlag & 8 ) == 8;
|
|
||||||
|
|
||||||
totalLengthBytes = SSTRecord.STRING_MINIMAL_OVERHEAD + calculateByteCount( char_count );
|
|
||||||
stringDataOffset = SSTRecord.STRING_MINIMAL_OVERHEAD;
|
|
||||||
if ( richText )
|
|
||||||
{
|
|
||||||
short run_count = LittleEndian.getShort( data, index + stringDataOffset );
|
|
||||||
|
|
||||||
stringDataOffset += LittleEndianConsts.SHORT_SIZE;
|
|
||||||
totalLengthBytes += LittleEndianConsts.SHORT_SIZE + ( LittleEndianConsts.INT_SIZE * run_count );
|
|
||||||
}
|
|
||||||
if ( farEast )
|
|
||||||
{
|
|
||||||
int extension_length = LittleEndian.getInt( data, index + stringDataOffset );
|
|
||||||
|
|
||||||
stringDataOffset += LittleEndianConsts.INT_SIZE;
|
|
||||||
totalLengthBytes += LittleEndianConsts.INT_SIZE + extension_length;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private void processString( final byte[] data, final int index,
|
|
||||||
final short char_count )
|
|
||||||
{
|
|
||||||
byte[] stringDataBuffer = new byte[totalLengthBytes];
|
|
||||||
int length = SSTRecord.STRING_MINIMAL_OVERHEAD + calculateByteCount( char_count );
|
|
||||||
byte[] bstring = new byte[length];
|
|
||||||
|
|
||||||
System.arraycopy( data, index, stringDataBuffer, 0, stringDataBuffer.length );
|
|
||||||
int offset = 0;
|
|
||||||
|
|
||||||
LittleEndian.putShort( bstring, offset, char_count );
|
|
||||||
offset += LittleEndianConsts.SHORT_SIZE;
|
|
||||||
bstring[offset] = stringDataBuffer[offset];
|
|
||||||
|
|
||||||
// System.out.println( "offset = " + stringDataOffset );
|
|
||||||
// System.out.println( "length = " + (bstring.length - STRING_MINIMAL_OVERHEAD) );
|
|
||||||
// System.out.println( "src.length = " + str_data.length );
|
|
||||||
// try
|
// try
|
||||||
// {
|
// {
|
||||||
// System.out.println( "----------------------- DUMP -------------------------" );
|
// System.out.println( "------------------- SST DUMP -------------------------" );
|
||||||
// HexDump.dump( stringDataBuffer, (long)stringDataOffset, System.out, 1);
|
// HexDump.dump( (byte[]) data, offset, System.out, offset, length );
|
||||||
// }
|
// }
|
||||||
// catch ( IOException e )
|
// catch ( IOException e )
|
||||||
// {
|
// {
|
||||||
@ -147,34 +164,95 @@ class SSTDeserializer
|
|||||||
// catch ( IllegalArgumentException e )
|
// catch ( IllegalArgumentException e )
|
||||||
// {
|
// {
|
||||||
// }
|
// }
|
||||||
System.arraycopy( stringDataBuffer, stringDataOffset, bstring,
|
// }
|
||||||
SSTRecord.STRING_MINIMAL_OVERHEAD,
|
|
||||||
bstring.length - SSTRecord.STRING_MINIMAL_OVERHEAD );
|
|
||||||
UnicodeString string = new UnicodeString( UnicodeString.sid,
|
|
||||||
(short) bstring.length,
|
|
||||||
bstring );
|
|
||||||
|
|
||||||
if ( getExpectedChars() != 0 )
|
/**
|
||||||
|
* Detemines the option types for the string (ie, compressed or uncompressed unicode, rich text string or
|
||||||
|
* plain string etc) and calculates the length and offset for the string.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
private void readStringHeader( final byte[] data, final int index )
|
||||||
{
|
{
|
||||||
unfinishedString = string.getString();
|
|
||||||
|
byte optionFlag = data[index + LittleEndianConsts.SHORT_SIZE];
|
||||||
|
|
||||||
|
wideChar = ( optionFlag & 1 ) == 1;
|
||||||
|
extendedText = ( optionFlag & 4 ) == 4;
|
||||||
|
richText = ( optionFlag & 8 ) == 8;
|
||||||
|
runCount = 0;
|
||||||
|
if ( richText )
|
||||||
|
{
|
||||||
|
runCount = LittleEndian.getShort( data, index + SSTRecord.STRING_MINIMAL_OVERHEAD );
|
||||||
}
|
}
|
||||||
else
|
extensionLength = 0;
|
||||||
|
if ( extendedText )
|
||||||
|
{
|
||||||
|
extensionLength = LittleEndian.getInt( data, index + SSTRecord.STRING_MINIMAL_OVERHEAD );
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a string or the first part of a string.
|
||||||
|
*
|
||||||
|
* @param characters the number of characters to write.
|
||||||
|
*
|
||||||
|
* @return the number of bytes written.
|
||||||
|
*/
|
||||||
|
private int processString( final byte[] data, final int dataIndex, final int characters )
|
||||||
|
{
|
||||||
|
|
||||||
|
// length is the length we store it as. not the length that is read.
|
||||||
|
int length = SSTRecord.STRING_MINIMAL_OVERHEAD + calculateByteCount( characters );
|
||||||
|
byte[] unicodeStringBuffer = new byte[length];
|
||||||
|
|
||||||
|
int offset = 0;
|
||||||
|
|
||||||
|
// Set the length in characters
|
||||||
|
LittleEndian.putUShort( unicodeStringBuffer, offset, characters );
|
||||||
|
offset += LittleEndianConsts.SHORT_SIZE;
|
||||||
|
// Set the option flags
|
||||||
|
unicodeStringBuffer[offset] = data[dataIndex + offset];
|
||||||
|
// Copy in the string data
|
||||||
|
int bytesRead = unicodeStringBuffer.length - SSTRecord.STRING_MINIMAL_OVERHEAD;
|
||||||
|
arraycopy( data, dataIndex + stringHeaderOverhead(), unicodeStringBuffer, SSTRecord.STRING_MINIMAL_OVERHEAD, bytesRead );
|
||||||
|
// Create the unicode string
|
||||||
|
UnicodeString string = new UnicodeString( UnicodeString.sid,
|
||||||
|
(short) unicodeStringBuffer.length,
|
||||||
|
unicodeStringBuffer );
|
||||||
|
|
||||||
|
if ( isStringFinished() )
|
||||||
{
|
{
|
||||||
Integer integer = new Integer( strings.size() );
|
Integer integer = new Integer( strings.size() );
|
||||||
addToStringTable( strings, integer, string );
|
addToStringTable( strings, integer, string );
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
unfinishedString = string.getString();
|
||||||
|
}
|
||||||
|
|
||||||
|
return bytesRead;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isStringFinished()
|
||||||
|
{
|
||||||
|
return getContinuationExpectedChars() == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Okay, we are doing some major cheating here. Because we can't handle rich text strings properly
|
* Okay, we are doing some major cheating here. Because we can't handle rich text strings properly
|
||||||
* we end up getting duplicate strings. To get around this I'm doing do things: 1. Converting rich
|
* we end up getting duplicate strings. To get around this I'm doing two things: 1. Converting rich
|
||||||
* text to normal text and 2. If there's a duplicate I'm adding a space onto the end. Sneaky perhaps
|
* text to normal text and 2. If there's a duplicate I'm adding a space onto the end. Sneaky perhaps
|
||||||
* but it gets the job done until we can handle this a little better.
|
* but it gets the job done until we can handle this a little better.
|
||||||
*/
|
*/
|
||||||
static public void addToStringTable( BinaryTree strings, Integer integer, UnicodeString string )
|
static public void addToStringTable( BinaryTree strings, Integer integer, UnicodeString string )
|
||||||
{
|
{
|
||||||
|
|
||||||
if ( string.isRichText() )
|
if ( string.isRichText() )
|
||||||
string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~8 ) ) );
|
string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~8 ) ) );
|
||||||
|
if ( string.isExtendedText() )
|
||||||
|
string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~4 ) ) );
|
||||||
|
|
||||||
boolean added = false;
|
boolean added = false;
|
||||||
while ( added == false )
|
while ( added == false )
|
||||||
@ -189,14 +267,13 @@ class SSTDeserializer
|
|||||||
string.setString( string.getString() + " " );
|
string.setString( string.getString() + " " );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private int calculateCharCount( final int byte_count )
|
private int calculateCharCount( final int byte_count )
|
||||||
{
|
{
|
||||||
return byte_count / ( wideChar ? LittleEndianConsts.SHORT_SIZE
|
return byte_count / ( wideChar ? LittleEndianConsts.SHORT_SIZE : LittleEndianConsts.BYTE_SIZE );
|
||||||
: LittleEndianConsts.BYTE_SIZE );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -219,81 +296,129 @@ class SSTDeserializer
|
|||||||
*
|
*
|
||||||
* @param record the Continue record's byte data
|
* @param record the Continue record's byte data
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public void processContinueRecord( final byte[] record )
|
public void processContinueRecord( final byte[] record )
|
||||||
{
|
{
|
||||||
if ( getExpectedChars() == 0 )
|
if ( isStringFinished() )
|
||||||
{
|
{
|
||||||
unfinishedString = "";
|
initVars();
|
||||||
totalLengthBytes = 0;
|
|
||||||
stringDataOffset = 0;
|
|
||||||
wideChar = false;
|
|
||||||
manufactureStrings( record, 0, (short) record.length );
|
manufactureStrings( record, 0, (short) record.length );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int data_length = record.length - LittleEndianConsts.BYTE_SIZE;
|
// reset the wide bit because that can change across a continuation. the fact that it's
|
||||||
|
// actually rich text doesn't change across continuations even though the rich text
|
||||||
|
// may on longer be set in the "new" option flag. confusing huh?
|
||||||
|
wideChar = ( record[0] & 1 ) == 1;
|
||||||
|
|
||||||
if ( calculateByteCount( getExpectedChars() ) > data_length )
|
if ( stringSpansContinuation( record.length - LittleEndianConsts.BYTE_SIZE ) )
|
||||||
{
|
{
|
||||||
|
processEntireContinuation( record );
|
||||||
// create artificial data to create a UnicodeString
|
|
||||||
byte[] input =
|
|
||||||
new byte[record.length + LittleEndianConsts.SHORT_SIZE];
|
|
||||||
short size = (short) ( ( ( record[0] & 1 ) == 1 )
|
|
||||||
? ( data_length / LittleEndianConsts.SHORT_SIZE )
|
|
||||||
: ( data_length / LittleEndianConsts.BYTE_SIZE ) );
|
|
||||||
|
|
||||||
LittleEndian.putShort( input, (byte) 0, size );
|
|
||||||
System.arraycopy( record, 0, input, LittleEndianConsts.SHORT_SIZE, record.length );
|
|
||||||
UnicodeString ucs = new UnicodeString( UnicodeString.sid, (short) input.length, input );
|
|
||||||
|
|
||||||
unfinishedString = unfinishedString + ucs.getString();
|
|
||||||
setExpectedChars( getExpectedChars() - size );
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
setupStringParameters( record, -LittleEndianConsts.SHORT_SIZE,
|
readStringRemainder( record );
|
||||||
getExpectedChars() );
|
}
|
||||||
byte[] str_data = new byte[totalLengthBytes];
|
}
|
||||||
int length = SSTRecord.STRING_MINIMAL_OVERHEAD
|
|
||||||
+ ( calculateByteCount( getExpectedChars() ) );
|
|
||||||
byte[] bstring = new byte[length];
|
|
||||||
|
|
||||||
// Copy data from the record into the string
|
}
|
||||||
// buffer. Copy skips the length of a short in the
|
|
||||||
// string buffer, to leave room for the string length.
|
/**
|
||||||
System.arraycopy( record, 0, str_data,
|
* Reads the remainder string and any subsequent strings from the continuation record.
|
||||||
LittleEndianConsts.SHORT_SIZE,
|
*
|
||||||
str_data.length
|
* @param record The entire continuation record data.
|
||||||
- LittleEndianConsts.SHORT_SIZE );
|
*/
|
||||||
|
private void readStringRemainder( final byte[] record )
|
||||||
|
{
|
||||||
|
int stringRemainderSizeInBytes = calculateByteCount( getContinuationExpectedChars() );
|
||||||
|
// stringDataOffset = LittleEndianConsts.BYTE_SIZE;
|
||||||
|
byte[] unicodeStringData = new byte[SSTRecord.STRING_MINIMAL_OVERHEAD
|
||||||
|
+ calculateByteCount( getContinuationExpectedChars() )];
|
||||||
|
|
||||||
// write the string length
|
// write the string length
|
||||||
LittleEndian.putShort( bstring, 0,
|
LittleEndian.putShort( unicodeStringData, 0, (short) getContinuationExpectedChars() );
|
||||||
(short) getExpectedChars() );
|
|
||||||
|
|
||||||
// write the options flag
|
// write the options flag
|
||||||
bstring[LittleEndianConsts.SHORT_SIZE] =
|
unicodeStringData[LittleEndianConsts.SHORT_SIZE] = createOptionByte( wideChar, richText, extendedText );
|
||||||
str_data[LittleEndianConsts.SHORT_SIZE];
|
|
||||||
|
|
||||||
// copy the bytes/words making up the string; skipping
|
// copy the bytes/words making up the string; skipping
|
||||||
// past all the overhead of the str_data array
|
// past all the overhead of the str_data array
|
||||||
System.arraycopy( str_data, stringDataOffset, bstring,
|
arraycopy( record, LittleEndianConsts.BYTE_SIZE, unicodeStringData,
|
||||||
SSTRecord.STRING_MINIMAL_OVERHEAD,
|
SSTRecord.STRING_MINIMAL_OVERHEAD,
|
||||||
bstring.length - SSTRecord.STRING_MINIMAL_OVERHEAD );
|
unicodeStringData.length - SSTRecord.STRING_MINIMAL_OVERHEAD );
|
||||||
|
|
||||||
// use special constructor to create the final string
|
// use special constructor to create the final string
|
||||||
UnicodeString string =
|
UnicodeString string = new UnicodeString( UnicodeString.sid,
|
||||||
new UnicodeString( UnicodeString.sid,
|
(short) unicodeStringData.length, unicodeStringData,
|
||||||
(short) bstring.length, bstring,
|
|
||||||
unfinishedString );
|
unfinishedString );
|
||||||
Integer integer = new Integer( strings.size() );
|
Integer integer = new Integer( strings.size() );
|
||||||
|
|
||||||
// field_3_strings.put( integer, string );
|
|
||||||
addToStringTable( strings, integer, string );
|
addToStringTable( strings, integer, string );
|
||||||
manufactureStrings( record, totalLengthBytes - LittleEndianConsts.SHORT_SIZE, (short) record.length );
|
|
||||||
|
int newOffset = offsetForContinuedRecord( stringRemainderSizeInBytes );
|
||||||
|
manufactureStrings( record, newOffset, (short) ( record.length - newOffset ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the size of the string in bytes based on the character width
|
||||||
|
*/
|
||||||
|
private int stringSizeInBytes()
|
||||||
|
{
|
||||||
|
return calculateByteCount( charCount );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the size of the string in byes. This figure includes all the over
|
||||||
|
* heads for the string.
|
||||||
|
*/
|
||||||
|
private int totalStringSize()
|
||||||
|
{
|
||||||
|
return stringSizeInBytes()
|
||||||
|
+ stringHeaderOverhead()
|
||||||
|
+ LittleEndianConsts.INT_SIZE * runCount
|
||||||
|
+ extensionLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int stringHeaderOverhead()
|
||||||
|
{
|
||||||
|
return SSTRecord.STRING_MINIMAL_OVERHEAD
|
||||||
|
+ ( richText ? LittleEndianConsts.SHORT_SIZE : 0 )
|
||||||
|
+ ( extendedText ? LittleEndianConsts.INT_SIZE : 0 );
|
||||||
|
}
|
||||||
|
|
||||||
|
private int offsetForContinuedRecord( int stringRemainderSizeInBytes )
|
||||||
|
{
|
||||||
|
return stringRemainderSizeInBytes + LittleEndianConsts.BYTE_SIZE
|
||||||
|
+ runCount * LittleEndianConsts.INT_SIZE + extensionLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte createOptionByte( boolean wideChar, boolean richText, boolean farEast )
|
||||||
|
{
|
||||||
|
return (byte) ( ( wideChar ? 1 : 0 ) + ( farEast ? 4 : 0 ) + ( richText ? 8 : 0 ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If the continued record is so long is spans into the next continue then
|
||||||
|
* simply suck the remaining string data into the existing <code>unfinishedString</code>.
|
||||||
|
*
|
||||||
|
* @param record The data from the continuation record.
|
||||||
|
*/
|
||||||
|
private void processEntireContinuation( final byte[] record )
|
||||||
|
{
|
||||||
|
// create artificial data to create a UnicodeString
|
||||||
|
int dataLengthInBytes = record.length - LittleEndianConsts.BYTE_SIZE;
|
||||||
|
byte[] unicodeStringData = new byte[record.length + LittleEndianConsts.SHORT_SIZE];
|
||||||
|
|
||||||
|
LittleEndian.putShort( unicodeStringData, (byte) 0, (short) calculateCharCount( dataLengthInBytes ) );
|
||||||
|
arraycopy( record, 0, unicodeStringData, LittleEndianConsts.SHORT_SIZE, record.length );
|
||||||
|
UnicodeString ucs = new UnicodeString( UnicodeString.sid, (short) unicodeStringData.length, unicodeStringData );
|
||||||
|
|
||||||
|
unfinishedString = unfinishedString + ucs.getString();
|
||||||
|
setContinuationExpectedChars( getContinuationExpectedChars() - calculateCharCount( dataLengthInBytes ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean stringSpansContinuation( int continuationSizeInBytes )
|
||||||
|
{
|
||||||
|
return calculateByteCount( getContinuationExpectedChars() ) > continuationSizeInBytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -301,12 +426,12 @@ class SSTDeserializer
|
|||||||
* sub-record in a subsequent continuation record
|
* sub-record in a subsequent continuation record
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int getExpectedChars()
|
int getContinuationExpectedChars()
|
||||||
{
|
{
|
||||||
return continuationExpectedChars;
|
return continuationExpectedChars;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void setExpectedChars( final int count )
|
private void setContinuationExpectedChars( final int count )
|
||||||
{
|
{
|
||||||
continuationExpectedChars = count;
|
continuationExpectedChars = count;
|
||||||
}
|
}
|
||||||
@ -317,37 +442,116 @@ class SSTDeserializer
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copies an array from the specified source array, beginning at the
|
||||||
|
* specified position, to the specified position of the destination array.
|
||||||
|
* A subsequence of array components are copied from the source
|
||||||
|
* array referenced by <code>src</code> to the destination array
|
||||||
|
* referenced by <code>dst</code>. The number of components copied is
|
||||||
|
* equal to the <code>length</code> argument. The components at
|
||||||
|
* positions <code>srcOffset</code> through
|
||||||
|
* <code>srcOffset+length-1</code> in the source array are copied into
|
||||||
|
* positions <code>dstOffset</code> through
|
||||||
|
* <code>dstOffset+length-1</code>, respectively, of the destination
|
||||||
|
* array.
|
||||||
|
* <p>
|
||||||
|
* If the <code>src</code> and <code>dst</code> arguments refer to the
|
||||||
|
* same array object, then the copying is performed as if the
|
||||||
|
* components at positions <code>srcOffset</code> through
|
||||||
|
* <code>srcOffset+length-1</code> were first copied to a temporary
|
||||||
|
* array with <code>length</code> components and then the contents of
|
||||||
|
* the temporary array were copied into positions
|
||||||
|
* <code>dstOffset</code> through <code>dstOffset+length-1</code> of the
|
||||||
|
* destination array.
|
||||||
|
* <p>
|
||||||
|
* If <code>dst</code> is <code>null</code>, then a
|
||||||
|
* <code>NullPointerException</code> is thrown.
|
||||||
|
* <p>
|
||||||
|
* If <code>src</code> is <code>null</code>, then a
|
||||||
|
* <code>NullPointerException</code> is thrown and the destination
|
||||||
|
* array is not modified.
|
||||||
|
* <p>
|
||||||
|
* Otherwise, if any of the following is true, an
|
||||||
|
* <code>ArrayStoreException</code> is thrown and the destination is
|
||||||
|
* not modified:
|
||||||
|
* <ul>
|
||||||
|
* <li>The <code>src</code> argument refers to an object that is not an
|
||||||
|
* array.
|
||||||
|
* <li>The <code>dst</code> argument refers to an object that is not an
|
||||||
|
* array.
|
||||||
|
* <li>The <code>src</code> argument and <code>dst</code> argument refer to
|
||||||
|
* arrays whose component types are different primitive types.
|
||||||
|
* <li>The <code>src</code> argument refers to an array with a primitive
|
||||||
|
* component type and the <code>dst</code> argument refers to an array
|
||||||
|
* with a reference component type.
|
||||||
|
* <li>The <code>src</code> argument refers to an array with a reference
|
||||||
|
* component type and the <code>dst</code> argument refers to an array
|
||||||
|
* with a primitive component type.
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* Otherwise, if any of the following is true, an
|
||||||
|
* <code>IndexOutOfBoundsException</code> is
|
||||||
|
* thrown and the destination is not modified:
|
||||||
|
* <ul>
|
||||||
|
* <li>The <code>srcOffset</code> argument is negative.
|
||||||
|
* <li>The <code>dstOffset</code> argument is negative.
|
||||||
|
* <li>The <code>length</code> argument is negative.
|
||||||
|
* <li><code>srcOffset+length</code> is greater than
|
||||||
|
* <code>src.length</code>, the length of the source array.
|
||||||
|
* <li><code>dstOffset+length</code> is greater than
|
||||||
|
* <code>dst.length</code>, the length of the destination array.
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* Otherwise, if any actual component of the source array from
|
||||||
|
* position <code>srcOffset</code> through
|
||||||
|
* <code>srcOffset+length-1</code> cannot be converted to the component
|
||||||
|
* type of the destination array by assignment conversion, an
|
||||||
|
* <code>ArrayStoreException</code> is thrown. In this case, let
|
||||||
|
* <b><i>k</i></b> be the smallest nonnegative integer less than
|
||||||
|
* length such that <code>src[srcOffset+</code><i>k</i><code>]</code>
|
||||||
|
* cannot be converted to the component type of the destination
|
||||||
|
* array; when the exception is thrown, source array components from
|
||||||
|
* positions <code>srcOffset</code> through
|
||||||
|
* <code>srcOffset+</code><i>k</i><code>-1</code>
|
||||||
|
* will already have been copied to destination array positions
|
||||||
|
* <code>dstOffset</code> through
|
||||||
|
* <code>dstOffset+</code><i>k</I><code>-1</code> and no other
|
||||||
|
* positions of the destination array will have been modified.
|
||||||
|
* (Because of the restrictions already itemized, this
|
||||||
|
* paragraph effectively applies only to the situation where both
|
||||||
|
* arrays have component types that are reference types.)
|
||||||
|
*
|
||||||
|
* @param src the source array.
|
||||||
|
* @param src_position start position in the source array.
|
||||||
|
* @param dst the destination array.
|
||||||
|
* @param dst_position pos start position in the destination data.
|
||||||
|
* @param length the number of array elements to be copied.
|
||||||
|
* @exception IndexOutOfBoundsException if copying would cause
|
||||||
|
* access of data outside array bounds.
|
||||||
|
* @exception ArrayStoreException if an element in the <code>src</code>
|
||||||
|
* array could not be stored into the <code>dest</code> array
|
||||||
|
* because of a type mismatch.
|
||||||
|
* @exception NullPointerException if either <code>src</code> or
|
||||||
|
* <code>dst</code> is <code>null</code>.
|
||||||
|
*/
|
||||||
|
private void arraycopy( byte[] src, int src_position,
|
||||||
|
byte[] dst, int dst_position,
|
||||||
|
int length )
|
||||||
|
{
|
||||||
|
System.arraycopy( src, src_position, dst, dst_position, length );
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the unfinished string
|
* @return the unfinished string
|
||||||
*/
|
*/
|
||||||
|
|
||||||
String getUnfinishedString()
|
String getUnfinishedString()
|
||||||
{
|
{
|
||||||
return unfinishedString;
|
return unfinishedString;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the total length of the current string
|
|
||||||
*/
|
|
||||||
|
|
||||||
int getTotalLength()
|
|
||||||
{
|
|
||||||
return totalLengthBytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return offset into current string data
|
|
||||||
*/
|
|
||||||
|
|
||||||
int getStringDataOffset()
|
|
||||||
{
|
|
||||||
return stringDataOffset;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return true if current string uses wide characters
|
* @return true if current string uses wide characters
|
||||||
*/
|
*/
|
||||||
|
|
||||||
boolean isWideChar()
|
boolean isWideChar()
|
||||||
{
|
{
|
||||||
return wideChar;
|
return wideChar;
|
||||||
|
@ -478,7 +478,7 @@ public class SSTRecord
|
|||||||
field_2_num_unique_strings = LittleEndian.getInt( data, 4 + offset );
|
field_2_num_unique_strings = LittleEndian.getInt( data, 4 + offset );
|
||||||
field_3_strings = new BinaryTree();
|
field_3_strings = new BinaryTree();
|
||||||
deserializer = new SSTDeserializer(field_3_strings);
|
deserializer = new SSTDeserializer(field_3_strings);
|
||||||
deserializer.manufactureStrings( data, 8 + offset, size );
|
deserializer.manufactureStrings( data, 8 + offset, (short)(size - 8) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -79,6 +79,7 @@ public class UnicodeString
|
|||||||
private byte field_2_optionflags; // = 0;
|
private byte field_2_optionflags; // = 0;
|
||||||
private String field_3_string; // = null;
|
private String field_3_string; // = null;
|
||||||
private final int RICH_TEXT_BIT = 8;
|
private final int RICH_TEXT_BIT = 8;
|
||||||
|
private final int EXT_BIT = 4;
|
||||||
|
|
||||||
public UnicodeString()
|
public UnicodeString()
|
||||||
{
|
{
|
||||||
@ -364,4 +365,9 @@ public class UnicodeString
|
|||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isExtendedText()
|
||||||
|
{
|
||||||
|
return (getOptionFlags() & EXT_BIT) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -81,6 +81,7 @@ public class HexDump
|
|||||||
* @param stream the OutputStream to which the data is to be
|
* @param stream the OutputStream to which the data is to be
|
||||||
* written
|
* written
|
||||||
* @param index initial index into the byte array
|
* @param index initial index into the byte array
|
||||||
|
* @param length number of characters to output
|
||||||
*
|
*
|
||||||
* @exception IOException is thrown if anything goes wrong writing
|
* @exception IOException is thrown if anything goes wrong writing
|
||||||
* the data to stream
|
* the data to stream
|
||||||
@ -89,9 +90,8 @@ public class HexDump
|
|||||||
* @exception IllegalArgumentException if the output stream is
|
* @exception IllegalArgumentException if the output stream is
|
||||||
* null
|
* null
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public synchronized static void dump(final byte [] data, final long offset,
|
public synchronized static void dump(final byte [] data, final long offset,
|
||||||
final OutputStream stream, final int index)
|
final OutputStream stream, final int index, final int length)
|
||||||
throws IOException, ArrayIndexOutOfBoundsException,
|
throws IOException, ArrayIndexOutOfBoundsException,
|
||||||
IllegalArgumentException
|
IllegalArgumentException
|
||||||
{
|
{
|
||||||
@ -108,9 +108,11 @@ public class HexDump
|
|||||||
long display_offset = offset + index;
|
long display_offset = offset + index;
|
||||||
StringBuffer buffer = new StringBuffer(74);
|
StringBuffer buffer = new StringBuffer(74);
|
||||||
|
|
||||||
for (int j = index; j < data.length; j += 16)
|
|
||||||
|
int data_length = Math.min(data.length,index+length);
|
||||||
|
for (int j = index; j < data_length; j += 16)
|
||||||
{
|
{
|
||||||
int chars_read = data.length - j;
|
int chars_read = data_length - j;
|
||||||
|
|
||||||
if (chars_read > 16)
|
if (chars_read > 16)
|
||||||
{
|
{
|
||||||
@ -146,6 +148,32 @@ public class HexDump
|
|||||||
buffer.setLength(0);
|
buffer.setLength(0);
|
||||||
display_offset += chars_read;
|
display_offset += chars_read;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* dump an array of bytes to an OutputStream
|
||||||
|
*
|
||||||
|
* @param data the byte array to be dumped
|
||||||
|
* @param offset its offset, whatever that might mean
|
||||||
|
* @param stream the OutputStream to which the data is to be
|
||||||
|
* written
|
||||||
|
* @param index initial index into the byte array
|
||||||
|
*
|
||||||
|
* @exception IOException is thrown if anything goes wrong writing
|
||||||
|
* the data to stream
|
||||||
|
* @exception ArrayIndexOutOfBoundsException if the index is
|
||||||
|
* outside the data array's bounds
|
||||||
|
* @exception IllegalArgumentException if the output stream is
|
||||||
|
* null
|
||||||
|
*/
|
||||||
|
|
||||||
|
public synchronized static void dump(final byte [] data, final long offset,
|
||||||
|
final OutputStream stream, final int index)
|
||||||
|
throws IOException, ArrayIndexOutOfBoundsException,
|
||||||
|
IllegalArgumentException
|
||||||
|
{
|
||||||
|
dump(data, offset, stream, index, data.length-index);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final String EOL =
|
public static final String EOL =
|
||||||
|
@ -236,13 +236,27 @@ public class LittleEndian
|
|||||||
*
|
*
|
||||||
* @exception ArrayIndexOutOfBoundsException may be thrown
|
* @exception ArrayIndexOutOfBoundsException may be thrown
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public static void putShort(final byte[] data, final int offset,
|
public static void putShort(final byte[] data, final int offset,
|
||||||
final short value)
|
final short value)
|
||||||
{
|
{
|
||||||
putNumber(data, offset, value, SHORT_SIZE);
|
putNumber(data, offset, value, SHORT_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* put an unsigned short value into a byte array
|
||||||
|
*
|
||||||
|
* @param data the byte array
|
||||||
|
* @param offset a starting offset into the byte array
|
||||||
|
* @param value the short (16-bit) value
|
||||||
|
*
|
||||||
|
* @exception ArrayIndexOutOfBoundsException may be thrown
|
||||||
|
*/
|
||||||
|
public static void putUShort(final byte[] data, final int offset,
|
||||||
|
final int value)
|
||||||
|
{
|
||||||
|
putNumber(data, offset, value, SHORT_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* put a array of shorts into a byte array
|
* put a array of shorts into a byte array
|
||||||
*
|
*
|
||||||
|
16
src/testcases/org/apache/poi/hssf/data/evencontinuation.txt
Normal file
16
src/testcases/org/apache/poi/hssf/data/evencontinuation.txt
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
14 00 # String length 0x14=20
|
||||||
|
01 # Option flag, 16bit
|
||||||
|
# String: At a dinner party or
|
||||||
|
41 00 74 00 20 00 61 00 20 00
|
||||||
|
64 00 69 00 6E 00 6E 00 65 00
|
||||||
|
72 00 20 00 70 00 61 00 72 00
|
||||||
|
74 00 79 00 20 00 6F 00 72 00
|
||||||
|
|
||||||
|
# Continuation record (new string on the boundry)
|
||||||
|
11 00 # String length 0x11=17
|
||||||
|
00 # Option flag, 8bit
|
||||||
|
# String: At a dinner party
|
||||||
|
41 74 20 61 20
|
||||||
|
64 69 6E 6E 65
|
||||||
|
72 20 70 61 72
|
||||||
|
74 79
|
21
src/testcases/org/apache/poi/hssf/data/richtextdata.txt
Normal file
21
src/testcases/org/apache/poi/hssf/data/richtextdata.txt
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
1D 00 # String length 0x1b=29
|
||||||
|
09 # Option flag, rich text + 16bit
|
||||||
|
02 00 # Formatting runs
|
||||||
|
# String: At a dinner party or
|
||||||
|
41 00 74 00 20 00 61 00 20 00
|
||||||
|
64 00 69 00 6E 00 6E 00 65 00
|
||||||
|
72 00 20 00 70 00 61 00 72 00
|
||||||
|
74 00 79 00 20 00 6F 00 72 00
|
||||||
|
|
||||||
|
# Continuation record
|
||||||
|
00 # option flag
|
||||||
|
|
||||||
|
# string:at at at
|
||||||
|
41 74 20
|
||||||
|
41 74 20
|
||||||
|
41 74 20
|
||||||
|
|
||||||
|
00 00 # Formatting run 1, first formated char at 0
|
||||||
|
00 00 # Formatting run 1, Index to font record
|
||||||
|
02 00 # Formatting run 2, first formated char at 2
|
||||||
|
00 00 # Formatting run 2, Index to font record
|
@ -0,0 +1,7 @@
|
|||||||
|
14 00 # String length 0x14=20
|
||||||
|
01 # Option flag, 16bit
|
||||||
|
# String: At a dinner party or
|
||||||
|
41 00 74 00 20 00 61 00 20 00
|
||||||
|
64 00 69 00 6E 00 6E 00 65 00
|
||||||
|
72 00 20 00 70 00 61 00 72 00
|
||||||
|
74 00 79 00 20 00 6F 00 72 00
|
@ -0,0 +1,9 @@
|
|||||||
|
|
||||||
|
# Continuation record
|
||||||
|
22 00 # String length 0x11=17
|
||||||
|
00 # Option flag, 8bit
|
||||||
|
# String: At a dinner party
|
||||||
|
41 74 20 61 20
|
||||||
|
64 69 6E 6E 65
|
||||||
|
72 20 70 61 72
|
||||||
|
74 79
|
@ -0,0 +1,7 @@
|
|||||||
|
# Continuation record
|
||||||
|
00 # option flag
|
||||||
|
# String: At a dinner party
|
||||||
|
41 74 20 61 20
|
||||||
|
64 69 6E 6E 65
|
||||||
|
72 20 70 61 72
|
||||||
|
74 79
|
@ -55,19 +55,20 @@
|
|||||||
package org.apache.poi.hssf.record;
|
package org.apache.poi.hssf.record;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
import org.apache.poi.hssf.usermodel.HSSFSheet;
|
||||||
|
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
||||||
|
import org.apache.poi.util.BinaryTree;
|
||||||
|
import org.apache.poi.util.HexRead;
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
import org.apache.poi.util.LittleEndianConsts;
|
import org.apache.poi.util.LittleEndianConsts;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
|
||||||
import org.apache.poi.hssf.usermodel.HSSFSheet;
|
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author Marc Johnson (mjohnson at apache dot org)
|
* @author Marc Johnson (mjohnson at apache dot org)
|
||||||
|
* @author Glen Stampoultzis (glens at apache.org)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class TestSSTRecord
|
public class TestSSTRecord
|
||||||
@ -98,14 +99,14 @@ public class TestSSTRecord
|
|||||||
public void testProcessContinueRecord()
|
public void testProcessContinueRecord()
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
byte[] testdata = readTestData( "BigSSTRecord" );
|
byte[] testdata = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord" );
|
||||||
byte[] input = new byte[testdata.length - 4];
|
byte[] input = new byte[testdata.length - 4];
|
||||||
|
|
||||||
System.arraycopy( testdata, 4, input, 0, input.length );
|
System.arraycopy( testdata, 4, input, 0, input.length );
|
||||||
SSTRecord record =
|
SSTRecord record =
|
||||||
new SSTRecord( LittleEndian.getShort( testdata, 0 ),
|
new SSTRecord( LittleEndian.getShort( testdata, 0 ),
|
||||||
LittleEndian.getShort( testdata, 2 ), input );
|
LittleEndian.getShort( testdata, 2 ), input );
|
||||||
byte[] continueRecord = readTestData( "BigSSTRecordCR" );
|
byte[] continueRecord = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecordCR" );
|
||||||
|
|
||||||
input = new byte[continueRecord.length - 4];
|
input = new byte[continueRecord.length - 4];
|
||||||
System.arraycopy( continueRecord, 4, input, 0, input.length );
|
System.arraycopy( continueRecord, 4, input, 0, input.length );
|
||||||
@ -141,42 +142,42 @@ public class TestSSTRecord
|
|||||||
assertEquals( record, testRecord );
|
assertEquals( record, testRecord );
|
||||||
|
|
||||||
// testing based on new bug report
|
// testing based on new bug report
|
||||||
testdata = readTestData( "BigSSTRecord2" );
|
testdata = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2" );
|
||||||
input = new byte[testdata.length - 4];
|
input = new byte[testdata.length - 4];
|
||||||
System.arraycopy( testdata, 4, input, 0, input.length );
|
System.arraycopy( testdata, 4, input, 0, input.length );
|
||||||
record = new SSTRecord( LittleEndian.getShort( testdata, 0 ),
|
record = new SSTRecord( LittleEndian.getShort( testdata, 0 ),
|
||||||
LittleEndian.getShort( testdata, 2 ), input );
|
LittleEndian.getShort( testdata, 2 ), input );
|
||||||
byte[] continueRecord1 = readTestData( "BigSSTRecord2CR1" );
|
byte[] continueRecord1 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR1" );
|
||||||
|
|
||||||
input = new byte[continueRecord1.length - 4];
|
input = new byte[continueRecord1.length - 4];
|
||||||
System.arraycopy( continueRecord1, 4, input, 0, input.length );
|
System.arraycopy( continueRecord1, 4, input, 0, input.length );
|
||||||
record.processContinueRecord( input );
|
record.processContinueRecord( input );
|
||||||
byte[] continueRecord2 = readTestData( "BigSSTRecord2CR2" );
|
byte[] continueRecord2 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR2" );
|
||||||
|
|
||||||
input = new byte[continueRecord2.length - 4];
|
input = new byte[continueRecord2.length - 4];
|
||||||
System.arraycopy( continueRecord2, 4, input, 0, input.length );
|
System.arraycopy( continueRecord2, 4, input, 0, input.length );
|
||||||
record.processContinueRecord( input );
|
record.processContinueRecord( input );
|
||||||
byte[] continueRecord3 = readTestData( "BigSSTRecord2CR3" );
|
byte[] continueRecord3 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR3" );
|
||||||
|
|
||||||
input = new byte[continueRecord3.length - 4];
|
input = new byte[continueRecord3.length - 4];
|
||||||
System.arraycopy( continueRecord3, 4, input, 0, input.length );
|
System.arraycopy( continueRecord3, 4, input, 0, input.length );
|
||||||
record.processContinueRecord( input );
|
record.processContinueRecord( input );
|
||||||
byte[] continueRecord4 = readTestData( "BigSSTRecord2CR4" );
|
byte[] continueRecord4 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR4" );
|
||||||
|
|
||||||
input = new byte[continueRecord4.length - 4];
|
input = new byte[continueRecord4.length - 4];
|
||||||
System.arraycopy( continueRecord4, 4, input, 0, input.length );
|
System.arraycopy( continueRecord4, 4, input, 0, input.length );
|
||||||
record.processContinueRecord( input );
|
record.processContinueRecord( input );
|
||||||
byte[] continueRecord5 = readTestData( "BigSSTRecord2CR5" );
|
byte[] continueRecord5 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR5" );
|
||||||
|
|
||||||
input = new byte[continueRecord5.length - 4];
|
input = new byte[continueRecord5.length - 4];
|
||||||
System.arraycopy( continueRecord5, 4, input, 0, input.length );
|
System.arraycopy( continueRecord5, 4, input, 0, input.length );
|
||||||
record.processContinueRecord( input );
|
record.processContinueRecord( input );
|
||||||
byte[] continueRecord6 = readTestData( "BigSSTRecord2CR6" );
|
byte[] continueRecord6 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR6" );
|
||||||
|
|
||||||
input = new byte[continueRecord6.length - 4];
|
input = new byte[continueRecord6.length - 4];
|
||||||
System.arraycopy( continueRecord6, 4, input, 0, input.length );
|
System.arraycopy( continueRecord6, 4, input, 0, input.length );
|
||||||
record.processContinueRecord( input );
|
record.processContinueRecord( input );
|
||||||
byte[] continueRecord7 = readTestData( "BigSSTRecord2CR7" );
|
byte[] continueRecord7 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR7" );
|
||||||
|
|
||||||
input = new byte[continueRecord7.length - 4];
|
input = new byte[continueRecord7.length - 4];
|
||||||
System.arraycopy( continueRecord7, 4, input, 0, input.length );
|
System.arraycopy( continueRecord7, 4, input, 0, input.length );
|
||||||
@ -208,6 +209,7 @@ public class TestSSTRecord
|
|||||||
}
|
}
|
||||||
assertEquals( offset, ser_output.length );
|
assertEquals( offset, ser_output.length );
|
||||||
assertEquals( record, testRecord );
|
assertEquals( record, testRecord );
|
||||||
|
assertEquals( record.countStrings(), testRecord.countStrings() );
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -333,7 +335,6 @@ public class TestSSTRecord
|
|||||||
*
|
*
|
||||||
* @exception IOException
|
* @exception IOException
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public void testSSTRecordBug()
|
public void testSSTRecordBug()
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
@ -366,7 +367,6 @@ public class TestSSTRecord
|
|||||||
/**
|
/**
|
||||||
* test simple addString
|
* test simple addString
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public void testSimpleAddString()
|
public void testSimpleAddString()
|
||||||
{
|
{
|
||||||
SSTRecord record = new SSTRecord();
|
SSTRecord record = new SSTRecord();
|
||||||
@ -420,7 +420,7 @@ public class TestSSTRecord
|
|||||||
public void testReaderConstructor()
|
public void testReaderConstructor()
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
byte[] testdata = readTestData( "BigSSTRecord" );
|
byte[] testdata = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord" );
|
||||||
byte[] input = new byte[testdata.length - 4];
|
byte[] input = new byte[testdata.length - 4];
|
||||||
|
|
||||||
System.arraycopy( testdata, 4, input, 0, input.length );
|
System.arraycopy( testdata, 4, input, 0, input.length );
|
||||||
@ -431,11 +431,11 @@ public class TestSSTRecord
|
|||||||
assertEquals( 1464, record.getNumStrings() );
|
assertEquals( 1464, record.getNumStrings() );
|
||||||
assertEquals( 688, record.getNumUniqueStrings() );
|
assertEquals( 688, record.getNumUniqueStrings() );
|
||||||
assertEquals( 492, record.countStrings() );
|
assertEquals( 492, record.countStrings() );
|
||||||
assertEquals( 1, record.getDeserializer().getExpectedChars() );
|
assertEquals( 1, record.getDeserializer().getContinuationExpectedChars() );
|
||||||
assertEquals( "Consolidated B-24J Liberator The Dragon & His Tai",
|
assertEquals( "Consolidated B-24J Liberator The Dragon & His Tai",
|
||||||
record.getDeserializer().getUnfinishedString() );
|
record.getDeserializer().getUnfinishedString() );
|
||||||
assertEquals( 52, record.getDeserializer().getTotalLength() );
|
// assertEquals( 52, record.getDeserializer().getTotalLength() );
|
||||||
assertEquals( 3, record.getDeserializer().getStringDataOffset() );
|
// assertEquals( 3, record.getDeserializer().getStringDataOffset() );
|
||||||
assertTrue( !record.getDeserializer().isWideChar() );
|
assertTrue( !record.getDeserializer().isWideChar() );
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -450,10 +450,10 @@ public class TestSSTRecord
|
|||||||
assertEquals( 0, record.getNumStrings() );
|
assertEquals( 0, record.getNumStrings() );
|
||||||
assertEquals( 0, record.getNumUniqueStrings() );
|
assertEquals( 0, record.getNumUniqueStrings() );
|
||||||
assertEquals( 0, record.countStrings() );
|
assertEquals( 0, record.countStrings() );
|
||||||
assertEquals( 0, record.getDeserializer().getExpectedChars() );
|
assertEquals( 0, record.getDeserializer().getContinuationExpectedChars() );
|
||||||
assertEquals( "", record.getDeserializer().getUnfinishedString() );
|
assertEquals( "", record.getDeserializer().getUnfinishedString() );
|
||||||
assertEquals( 0, record.getDeserializer().getTotalLength() );
|
// assertEquals( 0, record.getDeserializer().getTotalLength() );
|
||||||
assertEquals( 0, record.getDeserializer().getStringDataOffset() );
|
// assertEquals( 0, record.getDeserializer().getStringDataOffset() );
|
||||||
assertTrue( !record.getDeserializer().isWideChar() );
|
assertTrue( !record.getDeserializer().isWideChar() );
|
||||||
byte[] output = record.serialize();
|
byte[] output = record.serialize();
|
||||||
byte[] expected =
|
byte[] expected =
|
||||||
@ -482,99 +482,6 @@ public class TestSSTRecord
|
|||||||
junit.textui.TestRunner.run( TestSSTRecord.class );
|
junit.textui.TestRunner.run( TestSSTRecord.class );
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte[] readTestData( String filename )
|
|
||||||
throws IOException
|
|
||||||
{
|
|
||||||
File file = new File( _test_file_path
|
|
||||||
+ File.separator
|
|
||||||
+ filename );
|
|
||||||
FileInputStream stream = new FileInputStream( file );
|
|
||||||
int characterCount = 0;
|
|
||||||
byte b = (byte) 0;
|
|
||||||
List bytes = new ArrayList();
|
|
||||||
boolean done = false;
|
|
||||||
|
|
||||||
while ( !done )
|
|
||||||
{
|
|
||||||
int count = stream.read();
|
|
||||||
|
|
||||||
switch ( count )
|
|
||||||
{
|
|
||||||
|
|
||||||
case '0':
|
|
||||||
case '1':
|
|
||||||
case '2':
|
|
||||||
case '3':
|
|
||||||
case '4':
|
|
||||||
case '5':
|
|
||||||
case '6':
|
|
||||||
case '7':
|
|
||||||
case '8':
|
|
||||||
case '9':
|
|
||||||
b <<= 4;
|
|
||||||
b += (byte) ( count - '0' );
|
|
||||||
characterCount++;
|
|
||||||
if ( characterCount == 2 )
|
|
||||||
{
|
|
||||||
bytes.add( new Byte( b ) );
|
|
||||||
characterCount = 0;
|
|
||||||
b = (byte) 0;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 'A':
|
|
||||||
case 'B':
|
|
||||||
case 'C':
|
|
||||||
case 'D':
|
|
||||||
case 'E':
|
|
||||||
case 'F':
|
|
||||||
b <<= 4;
|
|
||||||
b += (byte) ( count + 10 - 'A' );
|
|
||||||
characterCount++;
|
|
||||||
if ( characterCount == 2 )
|
|
||||||
{
|
|
||||||
bytes.add( new Byte( b ) );
|
|
||||||
characterCount = 0;
|
|
||||||
b = (byte) 0;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 'a':
|
|
||||||
case 'b':
|
|
||||||
case 'c':
|
|
||||||
case 'd':
|
|
||||||
case 'e':
|
|
||||||
case 'f':
|
|
||||||
b <<= 4;
|
|
||||||
b += (byte) ( count + 10 - 'a' );
|
|
||||||
characterCount++;
|
|
||||||
if ( characterCount == 2 )
|
|
||||||
{
|
|
||||||
bytes.add( new Byte( b ) );
|
|
||||||
characterCount = 0;
|
|
||||||
b = (byte) 0;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case -1:
|
|
||||||
done = true;
|
|
||||||
break;
|
|
||||||
|
|
||||||
default :
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stream.close();
|
|
||||||
Byte[] polished = (Byte[]) bytes.toArray( new Byte[0] );
|
|
||||||
byte[] rval = new byte[polished.length];
|
|
||||||
|
|
||||||
for ( int j = 0; j < polished.length; j++ )
|
|
||||||
{
|
|
||||||
rval[j] = polished[j].byteValue();
|
|
||||||
}
|
|
||||||
return rval;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests that workbooks with rich text that duplicates a non rich text cell can be read and written.
|
* Tests that workbooks with rich text that duplicates a non rich text cell can be read and written.
|
||||||
*/
|
*/
|
||||||
@ -616,4 +523,52 @@ public class TestSSTRecord
|
|||||||
outStream.close();
|
outStream.close();
|
||||||
file.delete();
|
file.delete();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSpanRichTextToPlainText()
|
||||||
|
throws Exception
|
||||||
|
{
|
||||||
|
byte[] bytes = HexRead.readTestData( _test_file_path + File.separator + "richtextdata.txt" );
|
||||||
|
BinaryTree strings = new BinaryTree();
|
||||||
|
SSTDeserializer deserializer = new SSTDeserializer( strings );
|
||||||
|
deserializer.manufactureStrings( bytes, 0, (short) 45 );
|
||||||
|
byte[] continueBytes = new byte[bytes.length - 45];
|
||||||
|
System.arraycopy( bytes, 45, continueBytes, 0, bytes.length - 45 );
|
||||||
|
deserializer.processContinueRecord( continueBytes );
|
||||||
|
// System.out.println( "strings.getKeyForValue(new Integer(0)) = " + strings.get( new Integer( 0 ) ) );
|
||||||
|
|
||||||
|
assertEquals( "At a dinner party orAt At At ", strings.get( new Integer( 0 ) ) + "" );
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testContinuationWithNoOverlap()
|
||||||
|
throws Exception
|
||||||
|
{
|
||||||
|
byte[] bytes = HexRead.readTestData( _test_file_path + File.separator + "evencontinuation.txt" );
|
||||||
|
BinaryTree strings = new BinaryTree();
|
||||||
|
SSTDeserializer deserializer = new SSTDeserializer( strings );
|
||||||
|
deserializer.manufactureStrings( bytes, 0, (short) 43 );
|
||||||
|
byte[] continueBytes = new byte[bytes.length - 43];
|
||||||
|
System.arraycopy( bytes, 43, continueBytes, 0, bytes.length - 43 );
|
||||||
|
deserializer.processContinueRecord( continueBytes );
|
||||||
|
|
||||||
|
assertEquals( "At a dinner party or", strings.get( new Integer( 0 ) ) + "" );
|
||||||
|
assertEquals( "At a dinner party", strings.get( new Integer( 1 ) ) + "" );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testStringAcross2Continuations()
|
||||||
|
throws Exception
|
||||||
|
{
|
||||||
|
byte[] bytes = HexRead.readTestData( _test_file_path + File.separator + "stringacross2continuations.txt" );
|
||||||
|
BinaryTree strings = new BinaryTree();
|
||||||
|
SSTDeserializer deserializer = new SSTDeserializer( strings );
|
||||||
|
deserializer.manufactureStrings( bytes, 0, (short) 43 );
|
||||||
|
bytes = HexRead.readTestData( _test_file_path + File.separator + "stringacross2continuationsCR1.txt" );
|
||||||
|
deserializer.processContinueRecord( bytes );
|
||||||
|
bytes = HexRead.readTestData( _test_file_path + File.separator + "stringacross2continuationsCR2.txt" );
|
||||||
|
deserializer.processContinueRecord( bytes );
|
||||||
|
|
||||||
|
assertEquals( "At a dinner party or", strings.get( new Integer( 0 ) ) + "" );
|
||||||
|
assertEquals( "At a dinner partyAt a dinner party", strings.get( new Integer( 1 ) ) + "" );
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user