SST fixed!!! Yay... Will reliably read in spreadsheets that have rich text or extended text. Code is a bit cleaner now but could still use more improvement. If I have the energy I'll look into it.

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/branches/REL_1_5_BRANCH@352663 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Glen Stampoultzis 2002-06-09 12:33:26 +00:00
parent 8a4d120c34
commit d7672fa259
13 changed files with 1113 additions and 846 deletions

View File

@ -68,8 +68,8 @@
<version major="1"
minor="5"
fix ="0"
tag="dev"/>
fix ="1"
tag="dev"/>
<package>org.apache.poi</package>

View File

@ -1,12 +1,68 @@
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2002 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache POI" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache POI", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.poi.hssf.record;
import org.apache.poi.util.BinaryTree;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianConsts;
import org.apache.poi.util.BinaryTree;
import org.apache.poi.util.HexDump;
import java.io.IOException;
/**
* Handles the task of deserializing a SST string. The two main entry points are
*
* @author Glen Stampoultzis (glens at apache.org)
*/
class SSTDeserializer
{
@ -15,22 +71,35 @@ class SSTDeserializer
private int continuationExpectedChars;
/** this is the string we were working on before hitting the end of the current record. This string is NOT finished. */
private String unfinishedString;
/** this is the total length of the current string being handled */
private int totalLengthBytes;
/** this is the offset into a string field of the actual string data */
private int stringDataOffset;
/** this is true if the string uses wide characters */
private boolean wideChar;
/** this is true if the string is a rich text string */
private boolean richText;
/** this is true if the string is a far east string or some other wierd string */
private boolean extendedText;
/** Number of formatting runs in this rich text field */
private short runCount;
/** Number of characters in current string */
private int charCount;
private int extensionLength;
public SSTDeserializer(BinaryTree strings)
public SSTDeserializer( BinaryTree strings )
{
this.strings = strings;
setExpectedChars( 0 );
initVars();
}
private void initVars()
{
runCount = 0;
continuationExpectedChars = 0;
unfinishedString = "";
totalLengthBytes = 0;
stringDataOffset = 0;
// bytesInCurrentSegment = 0;
// stringDataOffset = 0;
wideChar = false;
richText = false;
extendedText = false;
}
/**
@ -38,14 +107,14 @@ class SSTDeserializer
* strings may span across multiple continuations. Read the SST record
* carefully before beginning to hack.
*/
public void manufactureStrings( final byte[] data, final int index,
short size )
public void manufactureStrings( final byte[] data, final int initialOffset, short dataSize )
{
int offset = index;
initVars();
while ( offset < size )
int offset = initialOffset;
while ( ( offset - initialOffset ) < dataSize )
{
int remaining = size - offset;
int remaining = dataSize - offset + initialOffset;
if ( ( remaining > 0 ) && ( remaining < LittleEndianConsts.SHORT_SIZE ) )
{
@ -53,90 +122,38 @@ class SSTDeserializer
}
if ( remaining == LittleEndianConsts.SHORT_SIZE )
{
setExpectedChars( LittleEndian.getUShort( data, offset ) );
setContinuationExpectedChars( LittleEndian.getUShort( data, offset ) );
unfinishedString = "";
break;
}
short charCount = LittleEndian.getShort( data, offset );
setupStringParameters( data, offset, charCount );
if ( remaining < totalLengthBytes )
charCount = LittleEndian.getUShort( data, offset );
readStringHeader( data, offset );
boolean stringContinuesOverContinuation = remaining < totalStringSize();
if ( stringContinuesOverContinuation )
{
setExpectedChars( calculateCharCount( totalLengthBytes - remaining ) );
charCount -= getExpectedChars();
totalLengthBytes = remaining;
int remainingBytes = ( initialOffset + dataSize ) - offset - stringHeaderOverhead();
setContinuationExpectedChars( charCount - calculateCharCount( remainingBytes ) );
charCount -= getContinuationExpectedChars();
}
else
{
setExpectedChars( 0 );
setContinuationExpectedChars( 0 );
}
processString( data, offset, charCount );
offset += totalLengthBytes;
if ( getExpectedChars() != 0 )
offset += totalStringSize();
if ( getContinuationExpectedChars() != 0 )
{
break;
}
}
}
/**
* Detemines the option types for the string (ie, compressed or uncompressed unicode, rich text string or
* plain string etc) and calculates the length and offset for the string.
*
* @param data
* @param index
* @param char_count
*/
private void setupStringParameters( final byte[] data, final int index,
final int char_count )
{
byte optionFlag = data[index + LittleEndianConsts.SHORT_SIZE];
wideChar = ( optionFlag & 1 ) == 1;
boolean farEast = ( optionFlag & 4 ) == 4;
boolean richText = ( optionFlag & 8 ) == 8;
totalLengthBytes = SSTRecord.STRING_MINIMAL_OVERHEAD + calculateByteCount( char_count );
stringDataOffset = SSTRecord.STRING_MINIMAL_OVERHEAD;
if ( richText )
{
short run_count = LittleEndian.getShort( data, index + stringDataOffset );
stringDataOffset += LittleEndianConsts.SHORT_SIZE;
totalLengthBytes += LittleEndianConsts.SHORT_SIZE + ( LittleEndianConsts.INT_SIZE * run_count );
}
if ( farEast )
{
int extension_length = LittleEndian.getInt( data, index + stringDataOffset );
stringDataOffset += LittleEndianConsts.INT_SIZE;
totalLengthBytes += LittleEndianConsts.INT_SIZE + extension_length;
}
}
private void processString( final byte[] data, final int index,
final short char_count )
{
byte[] stringDataBuffer = new byte[totalLengthBytes];
int length = SSTRecord.STRING_MINIMAL_OVERHEAD + calculateByteCount( char_count );
byte[] bstring = new byte[length];
System.arraycopy( data, index, stringDataBuffer, 0, stringDataBuffer.length );
int offset = 0;
LittleEndian.putShort( bstring, offset, char_count );
offset += LittleEndianConsts.SHORT_SIZE;
bstring[offset] = stringDataBuffer[offset];
// System.out.println( "offset = " + stringDataOffset );
// System.out.println( "length = " + (bstring.length - STRING_MINIMAL_OVERHEAD) );
// System.out.println( "src.length = " + str_data.length );
// private void dump( final byte[] data, int offset, int length )
// {
// try
// {
// System.out.println( "----------------------- DUMP -------------------------" );
// HexDump.dump( stringDataBuffer, (long)stringDataOffset, System.out, 1);
// System.out.println( "------------------- SST DUMP -------------------------" );
// HexDump.dump( (byte[]) data, offset, System.out, offset, length );
// }
// catch ( IOException e )
// {
@ -147,56 +164,116 @@ class SSTDeserializer
// catch ( IllegalArgumentException e )
// {
// }
System.arraycopy( stringDataBuffer, stringDataOffset, bstring,
SSTRecord.STRING_MINIMAL_OVERHEAD,
bstring.length - SSTRecord.STRING_MINIMAL_OVERHEAD );
UnicodeString string = new UnicodeString( UnicodeString.sid,
(short) bstring.length,
bstring );
// }
if ( getExpectedChars() != 0 )
/**
* Detemines the option types for the string (ie, compressed or uncompressed unicode, rich text string or
* plain string etc) and calculates the length and offset for the string.
*
*/
private void readStringHeader( final byte[] data, final int index )
{
byte optionFlag = data[index + LittleEndianConsts.SHORT_SIZE];
wideChar = ( optionFlag & 1 ) == 1;
extendedText = ( optionFlag & 4 ) == 4;
richText = ( optionFlag & 8 ) == 8;
runCount = 0;
if ( richText )
{
unfinishedString = string.getString();
runCount = LittleEndian.getShort( data, index + SSTRecord.STRING_MINIMAL_OVERHEAD );
}
else
extensionLength = 0;
if ( extendedText )
{
extensionLength = LittleEndian.getInt( data, index + SSTRecord.STRING_MINIMAL_OVERHEAD );
}
}
/**
* Reads a string or the first part of a string.
*
* @param characters the number of characters to write.
*
* @return the number of bytes written.
*/
private int processString( final byte[] data, final int dataIndex, final int characters )
{
// length is the length we store it as. not the length that is read.
int length = SSTRecord.STRING_MINIMAL_OVERHEAD + calculateByteCount( characters );
byte[] unicodeStringBuffer = new byte[length];
int offset = 0;
// Set the length in characters
LittleEndian.putUShort( unicodeStringBuffer, offset, characters );
offset += LittleEndianConsts.SHORT_SIZE;
// Set the option flags
unicodeStringBuffer[offset] = data[dataIndex + offset];
// Copy in the string data
int bytesRead = unicodeStringBuffer.length - SSTRecord.STRING_MINIMAL_OVERHEAD;
arraycopy( data, dataIndex + stringHeaderOverhead(), unicodeStringBuffer, SSTRecord.STRING_MINIMAL_OVERHEAD, bytesRead );
// Create the unicode string
UnicodeString string = new UnicodeString( UnicodeString.sid,
(short) unicodeStringBuffer.length,
unicodeStringBuffer );
if ( isStringFinished() )
{
Integer integer = new Integer( strings.size() );
addToStringTable( strings, integer, string );
}
else
{
unfinishedString = string.getString();
}
return bytesRead;
}
private boolean isStringFinished()
{
return getContinuationExpectedChars() == 0;
}
/**
* Okay, we are doing some major cheating here. Because we can't handle rich text strings properly
* we end up getting duplicate strings. To get around this I'm doing do things: 1. Converting rich
* we end up getting duplicate strings. To get around this I'm doing two things: 1. Converting rich
* text to normal text and 2. If there's a duplicate I'm adding a space onto the end. Sneaky perhaps
* but it gets the job done until we can handle this a little better.
*/
static public void addToStringTable( BinaryTree strings, Integer integer, UnicodeString string )
{
if (string.isRichText())
string.setOptionFlags( (byte)(string.getOptionFlags() & (~8) ) );
if ( string.isRichText() )
string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~8 ) ) );
if ( string.isExtendedText() )
string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~4 ) ) );
boolean added = false;
while (added == false)
while ( added == false )
{
try
{
strings.put( integer, string );
added = true;
}
catch( Exception ignore )
catch ( Exception ignore )
{
string.setString( string.getString() + " " );
}
}
}
}
private int calculateCharCount( final int byte_count )
{
return byte_count / ( wideChar ? LittleEndianConsts.SHORT_SIZE
: LittleEndianConsts.BYTE_SIZE );
return byte_count / ( wideChar ? LittleEndianConsts.SHORT_SIZE : LittleEndianConsts.BYTE_SIZE );
}
/**
@ -219,81 +296,129 @@ class SSTDeserializer
*
* @param record the Continue record's byte data
*/
public void processContinueRecord( final byte[] record )
{
if ( getExpectedChars() == 0 )
if ( isStringFinished() )
{
unfinishedString = "";
totalLengthBytes = 0;
stringDataOffset = 0;
wideChar = false;
initVars();
manufactureStrings( record, 0, (short) record.length );
}
else
{
int data_length = record.length - LittleEndianConsts.BYTE_SIZE;
// reset the wide bit because that can change across a continuation. the fact that it's
// actually rich text doesn't change across continuations even though the rich text
// may on longer be set in the "new" option flag. confusing huh?
wideChar = ( record[0] & 1 ) == 1;
if ( calculateByteCount( getExpectedChars() ) > data_length )
if ( stringSpansContinuation( record.length - LittleEndianConsts.BYTE_SIZE ) )
{
// create artificial data to create a UnicodeString
byte[] input =
new byte[record.length + LittleEndianConsts.SHORT_SIZE];
short size = (short) ( ( ( record[0] & 1 ) == 1 )
? ( data_length / LittleEndianConsts.SHORT_SIZE )
: ( data_length / LittleEndianConsts.BYTE_SIZE ) );
LittleEndian.putShort( input, (byte) 0, size );
System.arraycopy( record, 0, input, LittleEndianConsts.SHORT_SIZE, record.length );
UnicodeString ucs = new UnicodeString( UnicodeString.sid, (short) input.length, input );
unfinishedString = unfinishedString + ucs.getString();
setExpectedChars( getExpectedChars() - size );
processEntireContinuation( record );
}
else
{
setupStringParameters( record, -LittleEndianConsts.SHORT_SIZE,
getExpectedChars() );
byte[] str_data = new byte[totalLengthBytes];
int length = SSTRecord.STRING_MINIMAL_OVERHEAD
+ ( calculateByteCount( getExpectedChars() ) );
byte[] bstring = new byte[length];
// Copy data from the record into the string
// buffer. Copy skips the length of a short in the
// string buffer, to leave room for the string length.
System.arraycopy( record, 0, str_data,
LittleEndianConsts.SHORT_SIZE,
str_data.length
- LittleEndianConsts.SHORT_SIZE );
// write the string length
LittleEndian.putShort( bstring, 0,
(short) getExpectedChars() );
// write the options flag
bstring[LittleEndianConsts.SHORT_SIZE] =
str_data[LittleEndianConsts.SHORT_SIZE];
// copy the bytes/words making up the string; skipping
// past all the overhead of the str_data array
System.arraycopy( str_data, stringDataOffset, bstring,
SSTRecord.STRING_MINIMAL_OVERHEAD,
bstring.length - SSTRecord.STRING_MINIMAL_OVERHEAD );
// use special constructor to create the final string
UnicodeString string =
new UnicodeString( UnicodeString.sid,
(short) bstring.length, bstring,
unfinishedString );
Integer integer = new Integer( strings.size() );
// field_3_strings.put( integer, string );
addToStringTable( strings, integer, string );
manufactureStrings( record, totalLengthBytes - LittleEndianConsts.SHORT_SIZE, (short) record.length );
readStringRemainder( record );
}
}
}
/**
* Reads the remainder string and any subsequent strings from the continuation record.
*
* @param record The entire continuation record data.
*/
private void readStringRemainder( final byte[] record )
{
int stringRemainderSizeInBytes = calculateByteCount( getContinuationExpectedChars() );
// stringDataOffset = LittleEndianConsts.BYTE_SIZE;
byte[] unicodeStringData = new byte[SSTRecord.STRING_MINIMAL_OVERHEAD
+ calculateByteCount( getContinuationExpectedChars() )];
// write the string length
LittleEndian.putShort( unicodeStringData, 0, (short) getContinuationExpectedChars() );
// write the options flag
unicodeStringData[LittleEndianConsts.SHORT_SIZE] = createOptionByte( wideChar, richText, extendedText );
// copy the bytes/words making up the string; skipping
// past all the overhead of the str_data array
arraycopy( record, LittleEndianConsts.BYTE_SIZE, unicodeStringData,
SSTRecord.STRING_MINIMAL_OVERHEAD,
unicodeStringData.length - SSTRecord.STRING_MINIMAL_OVERHEAD );
// use special constructor to create the final string
UnicodeString string = new UnicodeString( UnicodeString.sid,
(short) unicodeStringData.length, unicodeStringData,
unfinishedString );
Integer integer = new Integer( strings.size() );
addToStringTable( strings, integer, string );
int newOffset = offsetForContinuedRecord( stringRemainderSizeInBytes );
manufactureStrings( record, newOffset, (short) ( record.length - newOffset ) );
}
/**
* Calculates the size of the string in bytes based on the character width
*/
private int stringSizeInBytes()
{
return calculateByteCount( charCount );
}
/**
* Calculates the size of the string in byes. This figure includes all the over
* heads for the string.
*/
private int totalStringSize()
{
return stringSizeInBytes()
+ stringHeaderOverhead()
+ LittleEndianConsts.INT_SIZE * runCount
+ extensionLength;
}
private int stringHeaderOverhead()
{
return SSTRecord.STRING_MINIMAL_OVERHEAD
+ ( richText ? LittleEndianConsts.SHORT_SIZE : 0 )
+ ( extendedText ? LittleEndianConsts.INT_SIZE : 0 );
}
private int offsetForContinuedRecord( int stringRemainderSizeInBytes )
{
return stringRemainderSizeInBytes + LittleEndianConsts.BYTE_SIZE
+ runCount * LittleEndianConsts.INT_SIZE + extensionLength;
}
private byte createOptionByte( boolean wideChar, boolean richText, boolean farEast )
{
return (byte) ( ( wideChar ? 1 : 0 ) + ( farEast ? 4 : 0 ) + ( richText ? 8 : 0 ) );
}
/**
* If the continued record is so long is spans into the next continue then
* simply suck the remaining string data into the existing <code>unfinishedString</code>.
*
* @param record The data from the continuation record.
*/
private void processEntireContinuation( final byte[] record )
{
// create artificial data to create a UnicodeString
int dataLengthInBytes = record.length - LittleEndianConsts.BYTE_SIZE;
byte[] unicodeStringData = new byte[record.length + LittleEndianConsts.SHORT_SIZE];
LittleEndian.putShort( unicodeStringData, (byte) 0, (short) calculateCharCount( dataLengthInBytes ) );
arraycopy( record, 0, unicodeStringData, LittleEndianConsts.SHORT_SIZE, record.length );
UnicodeString ucs = new UnicodeString( UnicodeString.sid, (short) unicodeStringData.length, unicodeStringData );
unfinishedString = unfinishedString + ucs.getString();
setContinuationExpectedChars( getContinuationExpectedChars() - calculateCharCount( dataLengthInBytes ) );
}
private boolean stringSpansContinuation( int continuationSizeInBytes )
{
return calculateByteCount( getContinuationExpectedChars() ) > continuationSizeInBytes;
}
/**
@ -301,12 +426,12 @@ class SSTDeserializer
* sub-record in a subsequent continuation record
*/
int getExpectedChars()
int getContinuationExpectedChars()
{
return continuationExpectedChars;
}
private void setExpectedChars( final int count )
private void setContinuationExpectedChars( final int count )
{
continuationExpectedChars = count;
}
@ -317,37 +442,116 @@ class SSTDeserializer
}
/**
* Copies an array from the specified source array, beginning at the
* specified position, to the specified position of the destination array.
* A subsequence of array components are copied from the source
* array referenced by <code>src</code> to the destination array
* referenced by <code>dst</code>. The number of components copied is
* equal to the <code>length</code> argument. The components at
* positions <code>srcOffset</code> through
* <code>srcOffset+length-1</code> in the source array are copied into
* positions <code>dstOffset</code> through
* <code>dstOffset+length-1</code>, respectively, of the destination
* array.
* <p>
* If the <code>src</code> and <code>dst</code> arguments refer to the
* same array object, then the copying is performed as if the
* components at positions <code>srcOffset</code> through
* <code>srcOffset+length-1</code> were first copied to a temporary
* array with <code>length</code> components and then the contents of
* the temporary array were copied into positions
* <code>dstOffset</code> through <code>dstOffset+length-1</code> of the
* destination array.
* <p>
* If <code>dst</code> is <code>null</code>, then a
* <code>NullPointerException</code> is thrown.
* <p>
* If <code>src</code> is <code>null</code>, then a
* <code>NullPointerException</code> is thrown and the destination
* array is not modified.
* <p>
* Otherwise, if any of the following is true, an
* <code>ArrayStoreException</code> is thrown and the destination is
* not modified:
* <ul>
* <li>The <code>src</code> argument refers to an object that is not an
* array.
* <li>The <code>dst</code> argument refers to an object that is not an
* array.
* <li>The <code>src</code> argument and <code>dst</code> argument refer to
* arrays whose component types are different primitive types.
* <li>The <code>src</code> argument refers to an array with a primitive
* component type and the <code>dst</code> argument refers to an array
* with a reference component type.
* <li>The <code>src</code> argument refers to an array with a reference
* component type and the <code>dst</code> argument refers to an array
* with a primitive component type.
* </ul>
* <p>
* Otherwise, if any of the following is true, an
* <code>IndexOutOfBoundsException</code> is
* thrown and the destination is not modified:
* <ul>
* <li>The <code>srcOffset</code> argument is negative.
* <li>The <code>dstOffset</code> argument is negative.
* <li>The <code>length</code> argument is negative.
* <li><code>srcOffset+length</code> is greater than
* <code>src.length</code>, the length of the source array.
* <li><code>dstOffset+length</code> is greater than
* <code>dst.length</code>, the length of the destination array.
* </ul>
* <p>
* Otherwise, if any actual component of the source array from
* position <code>srcOffset</code> through
* <code>srcOffset+length-1</code> cannot be converted to the component
* type of the destination array by assignment conversion, an
* <code>ArrayStoreException</code> is thrown. In this case, let
* <b><i>k</i></b> be the smallest nonnegative integer less than
* length such that <code>src[srcOffset+</code><i>k</i><code>]</code>
* cannot be converted to the component type of the destination
* array; when the exception is thrown, source array components from
* positions <code>srcOffset</code> through
* <code>srcOffset+</code><i>k</i><code>-1</code>
* will already have been copied to destination array positions
* <code>dstOffset</code> through
* <code>dstOffset+</code><i>k</I><code>-1</code> and no other
* positions of the destination array will have been modified.
* (Because of the restrictions already itemized, this
* paragraph effectively applies only to the situation where both
* arrays have component types that are reference types.)
*
* @param src the source array.
* @param src_position start position in the source array.
* @param dst the destination array.
* @param dst_position pos start position in the destination data.
* @param length the number of array elements to be copied.
* @exception IndexOutOfBoundsException if copying would cause
* access of data outside array bounds.
* @exception ArrayStoreException if an element in the <code>src</code>
* array could not be stored into the <code>dest</code> array
* because of a type mismatch.
* @exception NullPointerException if either <code>src</code> or
* <code>dst</code> is <code>null</code>.
*/
private void arraycopy( byte[] src, int src_position,
byte[] dst, int dst_position,
int length )
{
System.arraycopy( src, src_position, dst, dst_position, length );
}
/**
* @return the unfinished string
*/
String getUnfinishedString()
{
return unfinishedString;
}
/**
* @return the total length of the current string
*/
int getTotalLength()
{
return totalLengthBytes;
}
/**
* @return offset into current string data
*/
int getStringDataOffset()
{
return stringDataOffset;
}
/**
* @return true if current string uses wide characters
*/
boolean isWideChar()
{
return wideChar;

View File

@ -478,7 +478,7 @@ public class SSTRecord
field_2_num_unique_strings = LittleEndian.getInt( data, 4 + offset );
field_3_strings = new BinaryTree();
deserializer = new SSTDeserializer(field_3_strings);
deserializer.manufactureStrings( data, 8 + offset, size );
deserializer.manufactureStrings( data, 8 + offset, (short)(size - 8) );
}

View File

@ -79,6 +79,7 @@ public class UnicodeString
private byte field_2_optionflags; // = 0;
private String field_3_string; // = null;
private final int RICH_TEXT_BIT = 8;
private final int EXT_BIT = 4;
public UnicodeString()
{
@ -364,4 +365,9 @@ public class UnicodeString
return rval;
}
public boolean isExtendedText()
{
return (getOptionFlags() & EXT_BIT) != 0;
}
}

View File

@ -81,6 +81,7 @@ public class HexDump
* @param stream the OutputStream to which the data is to be
* written
* @param index initial index into the byte array
* @param length number of characters to output
*
* @exception IOException is thrown if anything goes wrong writing
* the data to stream
@ -89,11 +90,10 @@ public class HexDump
* @exception IllegalArgumentException if the output stream is
* null
*/
public synchronized static void dump(final byte [] data, final long offset,
final OutputStream stream, final int index)
throws IOException, ArrayIndexOutOfBoundsException,
IllegalArgumentException
final OutputStream stream, final int index, final int length)
throws IOException, ArrayIndexOutOfBoundsException,
IllegalArgumentException
{
if ((index < 0) || (index >= data.length))
{
@ -108,9 +108,11 @@ public class HexDump
long display_offset = offset + index;
StringBuffer buffer = new StringBuffer(74);
for (int j = index; j < data.length; j += 16)
int data_length = Math.min(data.length,index+length);
for (int j = index; j < data_length; j += 16)
{
int chars_read = data.length - j;
int chars_read = data_length - j;
if (chars_read > 16)
{
@ -146,6 +148,32 @@ public class HexDump
buffer.setLength(0);
display_offset += chars_read;
}
}
/**
* dump an array of bytes to an OutputStream
*
* @param data the byte array to be dumped
* @param offset its offset, whatever that might mean
* @param stream the OutputStream to which the data is to be
* written
* @param index initial index into the byte array
*
* @exception IOException is thrown if anything goes wrong writing
* the data to stream
* @exception ArrayIndexOutOfBoundsException if the index is
* outside the data array's bounds
* @exception IllegalArgumentException if the output stream is
* null
*/
public synchronized static void dump(final byte [] data, final long offset,
final OutputStream stream, final int index)
throws IOException, ArrayIndexOutOfBoundsException,
IllegalArgumentException
{
dump(data, offset, stream, index, data.length-index);
}
public static final String EOL =

View File

@ -236,13 +236,27 @@ public class LittleEndian
*
* @exception ArrayIndexOutOfBoundsException may be thrown
*/
public static void putShort(final byte[] data, final int offset,
final short value)
{
putNumber(data, offset, value, SHORT_SIZE);
}
/**
* put an unsigned short value into a byte array
*
* @param data the byte array
* @param offset a starting offset into the byte array
* @param value the short (16-bit) value
*
* @exception ArrayIndexOutOfBoundsException may be thrown
*/
public static void putUShort(final byte[] data, final int offset,
final int value)
{
putNumber(data, offset, value, SHORT_SIZE);
}
/**
* put a array of shorts into a byte array
*

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,16 @@
14 00 # String length 0x14=20
01 # Option flag, 16bit
# String: At a dinner party or
41 00 74 00 20 00 61 00 20 00
64 00 69 00 6E 00 6E 00 65 00
72 00 20 00 70 00 61 00 72 00
74 00 79 00 20 00 6F 00 72 00
# Continuation record (new string on the boundry)
11 00 # String length 0x11=17
00 # Option flag, 8bit
# String: At a dinner party
41 74 20 61 20
64 69 6E 6E 65
72 20 70 61 72
74 79

View File

@ -0,0 +1,21 @@
1D 00 # String length 0x1b=29
09 # Option flag, rich text + 16bit
02 00 # Formatting runs
# String: At a dinner party or
41 00 74 00 20 00 61 00 20 00
64 00 69 00 6E 00 6E 00 65 00
72 00 20 00 70 00 61 00 72 00
74 00 79 00 20 00 6F 00 72 00
# Continuation record
00 # option flag
# string:at at at
41 74 20
41 74 20
41 74 20
00 00 # Formatting run 1, first formated char at 0
00 00 # Formatting run 1, Index to font record
02 00 # Formatting run 2, first formated char at 2
00 00 # Formatting run 2, Index to font record

View File

@ -0,0 +1,7 @@
14 00 # String length 0x14=20
01 # Option flag, 16bit
# String: At a dinner party or
41 00 74 00 20 00 61 00 20 00
64 00 69 00 6E 00 6E 00 65 00
72 00 20 00 70 00 61 00 72 00
74 00 79 00 20 00 6F 00 72 00

View File

@ -0,0 +1,9 @@
# Continuation record
22 00 # String length 0x11=17
00 # Option flag, 8bit
# String: At a dinner party
41 74 20 61 20
64 69 6E 6E 65
72 20 70 61 72
74 79

View File

@ -0,0 +1,7 @@
# Continuation record
00 # option flag
# String: At a dinner party
41 74 20 61 20
64 69 6E 6E 65
72 20 70 61 72
74 79

View File

@ -55,19 +55,20 @@
package org.apache.poi.hssf.record;
import junit.framework.TestCase;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.util.BinaryTree;
import org.apache.poi.util.HexRead;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianConsts;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import java.io.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
/**
* @author Marc Johnson (mjohnson at apache dot org)
* @author Glen Stampoultzis (glens at apache.org)
*/
public class TestSSTRecord
@ -98,14 +99,14 @@ public class TestSSTRecord
public void testProcessContinueRecord()
throws IOException
{
byte[] testdata = readTestData( "BigSSTRecord" );
byte[] testdata = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord" );
byte[] input = new byte[testdata.length - 4];
System.arraycopy( testdata, 4, input, 0, input.length );
SSTRecord record =
new SSTRecord( LittleEndian.getShort( testdata, 0 ),
LittleEndian.getShort( testdata, 2 ), input );
byte[] continueRecord = readTestData( "BigSSTRecordCR" );
byte[] continueRecord = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecordCR" );
input = new byte[continueRecord.length - 4];
System.arraycopy( continueRecord, 4, input, 0, input.length );
@ -141,42 +142,42 @@ public class TestSSTRecord
assertEquals( record, testRecord );
// testing based on new bug report
testdata = readTestData( "BigSSTRecord2" );
testdata = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2" );
input = new byte[testdata.length - 4];
System.arraycopy( testdata, 4, input, 0, input.length );
record = new SSTRecord( LittleEndian.getShort( testdata, 0 ),
LittleEndian.getShort( testdata, 2 ), input );
byte[] continueRecord1 = readTestData( "BigSSTRecord2CR1" );
byte[] continueRecord1 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR1" );
input = new byte[continueRecord1.length - 4];
System.arraycopy( continueRecord1, 4, input, 0, input.length );
record.processContinueRecord( input );
byte[] continueRecord2 = readTestData( "BigSSTRecord2CR2" );
byte[] continueRecord2 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR2" );
input = new byte[continueRecord2.length - 4];
System.arraycopy( continueRecord2, 4, input, 0, input.length );
record.processContinueRecord( input );
byte[] continueRecord3 = readTestData( "BigSSTRecord2CR3" );
byte[] continueRecord3 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR3" );
input = new byte[continueRecord3.length - 4];
System.arraycopy( continueRecord3, 4, input, 0, input.length );
record.processContinueRecord( input );
byte[] continueRecord4 = readTestData( "BigSSTRecord2CR4" );
byte[] continueRecord4 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR4" );
input = new byte[continueRecord4.length - 4];
System.arraycopy( continueRecord4, 4, input, 0, input.length );
record.processContinueRecord( input );
byte[] continueRecord5 = readTestData( "BigSSTRecord2CR5" );
byte[] continueRecord5 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR5" );
input = new byte[continueRecord5.length - 4];
System.arraycopy( continueRecord5, 4, input, 0, input.length );
record.processContinueRecord( input );
byte[] continueRecord6 = readTestData( "BigSSTRecord2CR6" );
byte[] continueRecord6 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR6" );
input = new byte[continueRecord6.length - 4];
System.arraycopy( continueRecord6, 4, input, 0, input.length );
record.processContinueRecord( input );
byte[] continueRecord7 = readTestData( "BigSSTRecord2CR7" );
byte[] continueRecord7 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR7" );
input = new byte[continueRecord7.length - 4];
System.arraycopy( continueRecord7, 4, input, 0, input.length );
@ -208,6 +209,7 @@ public class TestSSTRecord
}
assertEquals( offset, ser_output.length );
assertEquals( record, testRecord );
assertEquals( record.countStrings(), testRecord.countStrings() );
}
/**
@ -333,7 +335,6 @@ public class TestSSTRecord
*
* @exception IOException
*/
public void testSSTRecordBug()
throws IOException
{
@ -366,7 +367,6 @@ public class TestSSTRecord
/**
* test simple addString
*/
public void testSimpleAddString()
{
SSTRecord record = new SSTRecord();
@ -420,7 +420,7 @@ public class TestSSTRecord
public void testReaderConstructor()
throws IOException
{
byte[] testdata = readTestData( "BigSSTRecord" );
byte[] testdata = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord" );
byte[] input = new byte[testdata.length - 4];
System.arraycopy( testdata, 4, input, 0, input.length );
@ -431,11 +431,11 @@ public class TestSSTRecord
assertEquals( 1464, record.getNumStrings() );
assertEquals( 688, record.getNumUniqueStrings() );
assertEquals( 492, record.countStrings() );
assertEquals( 1, record.getDeserializer().getExpectedChars() );
assertEquals( 1, record.getDeserializer().getContinuationExpectedChars() );
assertEquals( "Consolidated B-24J Liberator The Dragon & His Tai",
record.getDeserializer().getUnfinishedString() );
assertEquals( 52, record.getDeserializer().getTotalLength() );
assertEquals( 3, record.getDeserializer().getStringDataOffset() );
// assertEquals( 52, record.getDeserializer().getTotalLength() );
// assertEquals( 3, record.getDeserializer().getStringDataOffset() );
assertTrue( !record.getDeserializer().isWideChar() );
}
@ -450,10 +450,10 @@ public class TestSSTRecord
assertEquals( 0, record.getNumStrings() );
assertEquals( 0, record.getNumUniqueStrings() );
assertEquals( 0, record.countStrings() );
assertEquals( 0, record.getDeserializer().getExpectedChars() );
assertEquals( 0, record.getDeserializer().getContinuationExpectedChars() );
assertEquals( "", record.getDeserializer().getUnfinishedString() );
assertEquals( 0, record.getDeserializer().getTotalLength() );
assertEquals( 0, record.getDeserializer().getStringDataOffset() );
// assertEquals( 0, record.getDeserializer().getTotalLength() );
// assertEquals( 0, record.getDeserializer().getStringDataOffset() );
assertTrue( !record.getDeserializer().isWideChar() );
byte[] output = record.serialize();
byte[] expected =
@ -482,99 +482,6 @@ public class TestSSTRecord
junit.textui.TestRunner.run( TestSSTRecord.class );
}
private byte[] readTestData( String filename )
throws IOException
{
File file = new File( _test_file_path
+ File.separator
+ filename );
FileInputStream stream = new FileInputStream( file );
int characterCount = 0;
byte b = (byte) 0;
List bytes = new ArrayList();
boolean done = false;
while ( !done )
{
int count = stream.read();
switch ( count )
{
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
b <<= 4;
b += (byte) ( count - '0' );
characterCount++;
if ( characterCount == 2 )
{
bytes.add( new Byte( b ) );
characterCount = 0;
b = (byte) 0;
}
break;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
b <<= 4;
b += (byte) ( count + 10 - 'A' );
characterCount++;
if ( characterCount == 2 )
{
bytes.add( new Byte( b ) );
characterCount = 0;
b = (byte) 0;
}
break;
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
b <<= 4;
b += (byte) ( count + 10 - 'a' );
characterCount++;
if ( characterCount == 2 )
{
bytes.add( new Byte( b ) );
characterCount = 0;
b = (byte) 0;
}
break;
case -1:
done = true;
break;
default :
break;
}
}
stream.close();
Byte[] polished = (Byte[]) bytes.toArray( new Byte[0] );
byte[] rval = new byte[polished.length];
for ( int j = 0; j < polished.length; j++ )
{
rval[j] = polished[j].byteValue();
}
return rval;
}
/**
* Tests that workbooks with rich text that duplicates a non rich text cell can be read and written.
*/
@ -582,38 +489,86 @@ public class TestSSTRecord
throws Exception
{
File file = new File( _test_file_path + File.separator + "duprich1.xls" );
InputStream stream = new FileInputStream(file);
HSSFWorkbook wb = new HSSFWorkbook(stream);
InputStream stream = new FileInputStream( file );
HSSFWorkbook wb = new HSSFWorkbook( stream );
stream.close();
HSSFSheet sheet = wb.getSheetAt(1);
assertEquals("01/05 (Wed) ", sheet.getRow(0).getCell((short)8).getStringCellValue());
assertEquals("01/05 (Wed)", sheet.getRow(1).getCell((short)8).getStringCellValue());
HSSFSheet sheet = wb.getSheetAt( 1 );
assertEquals( "01/05 (Wed) ", sheet.getRow( 0 ).getCell( (short) 8 ).getStringCellValue() );
assertEquals( "01/05 (Wed)", sheet.getRow( 1 ).getCell( (short) 8 ).getStringCellValue() );
file = File.createTempFile("testout", "xls");
FileOutputStream outStream = new FileOutputStream(file);
wb.write(outStream);
file = File.createTempFile( "testout", "xls" );
FileOutputStream outStream = new FileOutputStream( file );
wb.write( outStream );
outStream.close();
file.delete();
// test the second file.
file = new File( _test_file_path + File.separator + "duprich2.xls" );
stream = new FileInputStream(file);
wb = new HSSFWorkbook(stream);
stream = new FileInputStream( file );
wb = new HSSFWorkbook( stream );
stream.close();
sheet = wb.getSheetAt(0);
sheet = wb.getSheetAt( 0 );
int row = 0;
assertEquals("Testing ", sheet.getRow(row++).getCell((short)0).getStringCellValue());
assertEquals("rich", sheet.getRow(row++).getCell((short)0).getStringCellValue());
assertEquals("text", sheet.getRow(row++).getCell((short)0).getStringCellValue());
assertEquals("strings", sheet.getRow(row++).getCell((short)0).getStringCellValue());
assertEquals("Testing ", sheet.getRow(row++).getCell((short)0).getStringCellValue());
assertEquals("Testing", sheet.getRow(row++).getCell((short)0).getStringCellValue());
assertEquals( "Testing ", sheet.getRow( row++ ).getCell( (short) 0 ).getStringCellValue() );
assertEquals( "rich", sheet.getRow( row++ ).getCell( (short) 0 ).getStringCellValue() );
assertEquals( "text", sheet.getRow( row++ ).getCell( (short) 0 ).getStringCellValue() );
assertEquals( "strings", sheet.getRow( row++ ).getCell( (short) 0 ).getStringCellValue() );
assertEquals( "Testing ", sheet.getRow( row++ ).getCell( (short) 0 ).getStringCellValue() );
assertEquals( "Testing", sheet.getRow( row++ ).getCell( (short) 0 ).getStringCellValue() );
// file = new File("/tryme.xls");
file = File.createTempFile("testout", ".xls");
outStream = new FileOutputStream(file);
wb.write(outStream);
file = File.createTempFile( "testout", ".xls" );
outStream = new FileOutputStream( file );
wb.write( outStream );
outStream.close();
file.delete();
}
public void testSpanRichTextToPlainText()
throws Exception
{
byte[] bytes = HexRead.readTestData( _test_file_path + File.separator + "richtextdata.txt" );
BinaryTree strings = new BinaryTree();
SSTDeserializer deserializer = new SSTDeserializer( strings );
deserializer.manufactureStrings( bytes, 0, (short) 45 );
byte[] continueBytes = new byte[bytes.length - 45];
System.arraycopy( bytes, 45, continueBytes, 0, bytes.length - 45 );
deserializer.processContinueRecord( continueBytes );
// System.out.println( "strings.getKeyForValue(new Integer(0)) = " + strings.get( new Integer( 0 ) ) );
assertEquals( "At a dinner party orAt At At ", strings.get( new Integer( 0 ) ) + "" );
}
public void testContinuationWithNoOverlap()
throws Exception
{
byte[] bytes = HexRead.readTestData( _test_file_path + File.separator + "evencontinuation.txt" );
BinaryTree strings = new BinaryTree();
SSTDeserializer deserializer = new SSTDeserializer( strings );
deserializer.manufactureStrings( bytes, 0, (short) 43 );
byte[] continueBytes = new byte[bytes.length - 43];
System.arraycopy( bytes, 43, continueBytes, 0, bytes.length - 43 );
deserializer.processContinueRecord( continueBytes );
assertEquals( "At a dinner party or", strings.get( new Integer( 0 ) ) + "" );
assertEquals( "At a dinner party", strings.get( new Integer( 1 ) ) + "" );
}
public void testStringAcross2Continuations()
throws Exception
{
byte[] bytes = HexRead.readTestData( _test_file_path + File.separator + "stringacross2continuations.txt" );
BinaryTree strings = new BinaryTree();
SSTDeserializer deserializer = new SSTDeserializer( strings );
deserializer.manufactureStrings( bytes, 0, (short) 43 );
bytes = HexRead.readTestData( _test_file_path + File.separator + "stringacross2continuationsCR1.txt" );
deserializer.processContinueRecord( bytes );
bytes = HexRead.readTestData( _test_file_path + File.separator + "stringacross2continuationsCR2.txt" );
deserializer.processContinueRecord( bytes );
assertEquals( "At a dinner party or", strings.get( new Integer( 0 ) ) + "" );
assertEquals( "At a dinner partyAt a dinner party", strings.get( new Integer( 1 ) ) + "" );
}
}