SST fixed!!! Yay... Will reliably read in spreadsheets that have rich text or extended text. Code is a bit cleaner now but could still use more improvement. If I have the energy I'll look into it.

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/branches/REL_1_5_BRANCH@352663 13f79535-47bb-0310-9956-ffa450edef68
2002-06-09 12:33:26 +00:00 · 2002-06-09 12:33:26 +00:00 · d7672fa259
commit d7672fa259
parent 8a4d120c34
13 changed files with 1113 additions and 846 deletions
--- a/module.xml
+++ b/module.xml
@ -68,7 +68,7 @@
     <version major="1"
             minor="5"   
-             fix  ="0"   
+	     fix  ="1"   
 	     tag="dev"/>
    <package>org.apache.poi</package>
--- a/src/java/org/apache/poi/hssf/record/SSTDeserializer.java
+++ b/src/java/org/apache/poi/hssf/record/SSTDeserializer.java
@ -1,12 +1,68 @@
 /* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2002 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache POI" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache POI", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
 package org.apache.poi.hssf.record;
 import org.apache.poi.util.BinaryTree;
 import org.apache.poi.util.LittleEndian;
 import org.apache.poi.util.LittleEndianConsts;
 import org.apache.poi.util.BinaryTree;
 import org.apache.poi.util.HexDump;
 import java.io.IOException;
 /**
 * Handles the task of deserializing a SST string.  The two main entry points are
 *
 * @author Glen Stampoultzis (glens at apache.org)
 */
 class SSTDeserializer
 {
@ -15,22 +71,35 @@ class SSTDeserializer
    private int continuationExpectedChars;
    /** this is the string we were working on before hitting the end of the current record. This string is NOT finished. */
    private String unfinishedString;
    /** this is the total length of the current string being handled */
    private int totalLengthBytes;
    /** this is the offset into a string field of the actual string data */
    private int stringDataOffset;
    /** this is true if the string uses wide characters */
    private boolean wideChar;
    /** this is true if the string is a rich text string */
    private boolean richText;
    /** this is true if the string is a far east string or some other wierd string */
    private boolean extendedText;
    /** Number of formatting runs in this rich text field */
    private short runCount;
    /** Number of characters in current string */
    private int charCount;
    private int extensionLength;
    public SSTDeserializer( BinaryTree strings )
    {
        this.strings = strings;
-        setExpectedChars( 0 );
+        initVars();
    }
    private void initVars()
    {
        runCount = 0;
        continuationExpectedChars = 0;
        unfinishedString = "";
-        totalLengthBytes = 0;
+//        bytesInCurrentSegment = 0;
-        stringDataOffset = 0;
+//        stringDataOffset = 0;
        wideChar = false;
        richText = false;
        extendedText = false;
    }
    /**
@ -38,14 +107,14 @@ class SSTDeserializer
     * strings may span across multiple continuations. Read the SST record
     * carefully before beginning to hack.
     */
-    public void manufactureStrings( final byte[] data, final int index,
+    public void manufactureStrings( final byte[] data, final int initialOffset, short dataSize )
                                     short size )
    {
-        int offset = index;
+        initVars();
-        while ( offset < size )
+        int offset = initialOffset;
        while ( ( offset - initialOffset ) < dataSize )
        {
-            int remaining = size - offset;
+            int remaining = dataSize - offset + initialOffset;
            if ( ( remaining > 0 ) && ( remaining < LittleEndianConsts.SHORT_SIZE ) )
            {
@ -53,90 +122,38 @@ class SSTDeserializer
            }
            if ( remaining == LittleEndianConsts.SHORT_SIZE )
            {
-                setExpectedChars( LittleEndian.getUShort( data, offset ) );
+                setContinuationExpectedChars( LittleEndian.getUShort( data, offset ) );
                unfinishedString = "";
                break;
            }
-            short charCount = LittleEndian.getShort( data, offset );
+            charCount = LittleEndian.getUShort( data, offset );
-
+            readStringHeader( data, offset );
-            setupStringParameters( data, offset, charCount );
+            boolean stringContinuesOverContinuation = remaining < totalStringSize();
-            if ( remaining < totalLengthBytes )
+            if ( stringContinuesOverContinuation )
            {
-                setExpectedChars( calculateCharCount( totalLengthBytes - remaining ) );
+                int remainingBytes = ( initialOffset + dataSize ) - offset - stringHeaderOverhead();
-                charCount -= getExpectedChars();
+                setContinuationExpectedChars( charCount - calculateCharCount( remainingBytes ) );
-                totalLengthBytes = remaining;
+                charCount -= getContinuationExpectedChars();
            }
            else
            {
-                setExpectedChars( 0 );
+                setContinuationExpectedChars( 0 );
            }
            processString( data, offset, charCount );
-            offset += totalLengthBytes;
+            offset += totalStringSize();
-            if ( getExpectedChars() != 0 )
+            if ( getContinuationExpectedChars() != 0 )
            {
                break;
            }
        }
    }
-
+//    private void dump( final byte[] data, int offset, int length )
-    /**
+//    {
     * Detemines the option types for the string (ie, compressed or uncompressed unicode, rich text string or
     * plain string etc) and calculates the length and offset for the string.
     *
     * @param data
     * @param index
     * @param char_count
     */
    private void setupStringParameters( final byte[] data, final int index,
                                        final int char_count )
    {
        byte optionFlag = data[index + LittleEndianConsts.SHORT_SIZE];
        wideChar = ( optionFlag & 1 ) == 1;
        boolean farEast = ( optionFlag & 4 ) == 4;
        boolean richText = ( optionFlag & 8 ) == 8;
        totalLengthBytes = SSTRecord.STRING_MINIMAL_OVERHEAD + calculateByteCount( char_count );
        stringDataOffset = SSTRecord.STRING_MINIMAL_OVERHEAD;
        if ( richText )
        {
            short run_count = LittleEndian.getShort( data, index + stringDataOffset );
            stringDataOffset += LittleEndianConsts.SHORT_SIZE;
            totalLengthBytes += LittleEndianConsts.SHORT_SIZE + ( LittleEndianConsts.INT_SIZE * run_count );
        }
        if ( farEast )
        {
            int extension_length = LittleEndian.getInt( data, index + stringDataOffset );
            stringDataOffset += LittleEndianConsts.INT_SIZE;
            totalLengthBytes += LittleEndianConsts.INT_SIZE + extension_length;
        }
    }
    private void processString( final byte[] data, final int index,
                                final short char_count )
    {
        byte[] stringDataBuffer = new byte[totalLengthBytes];
        int length = SSTRecord.STRING_MINIMAL_OVERHEAD + calculateByteCount( char_count );
        byte[] bstring = new byte[length];
        System.arraycopy( data, index, stringDataBuffer, 0, stringDataBuffer.length );
        int offset = 0;
        LittleEndian.putShort( bstring, offset, char_count );
        offset += LittleEndianConsts.SHORT_SIZE;
        bstring[offset] = stringDataBuffer[offset];
 //        System.out.println( "offset = " + stringDataOffset );
 //        System.out.println( "length = " + (bstring.length - STRING_MINIMAL_OVERHEAD) );
 //        System.out.println( "src.length = " + str_data.length );
 //        try
 //        {
-//            System.out.println( "----------------------- DUMP -------------------------" );
+//            System.out.println( "------------------- SST DUMP -------------------------" );
-//            HexDump.dump( stringDataBuffer, (long)stringDataOffset, System.out, 1);
+//            HexDump.dump( (byte[]) data, offset, System.out, offset, length );
 //        }
 //        catch ( IOException e )
 //        {
@ -147,34 +164,95 @@ class SSTDeserializer
 //        catch ( IllegalArgumentException e )
 //        {
 //        }
-        System.arraycopy( stringDataBuffer, stringDataOffset, bstring,
+//    }
                SSTRecord.STRING_MINIMAL_OVERHEAD,
                bstring.length - SSTRecord.STRING_MINIMAL_OVERHEAD );
        UnicodeString string = new UnicodeString( UnicodeString.sid,
                (short) bstring.length,
                bstring );
-        if ( getExpectedChars() != 0 )
+    /**
     * Detemines the option types for the string (ie, compressed or uncompressed unicode, rich text string or
     * plain string etc) and calculates the length and offset for the string.
     *
     */
    private void readStringHeader( final byte[] data, final int index )
    {
-            unfinishedString = string.getString();
+
        byte optionFlag = data[index + LittleEndianConsts.SHORT_SIZE];
        wideChar = ( optionFlag & 1 ) == 1;
        extendedText = ( optionFlag & 4 ) == 4;
        richText = ( optionFlag & 8 ) == 8;
        runCount = 0;
        if ( richText )
        {
            runCount = LittleEndian.getShort( data, index + SSTRecord.STRING_MINIMAL_OVERHEAD );
        }
-        else
+        extensionLength = 0;
        if ( extendedText )
        {
            extensionLength = LittleEndian.getInt( data, index + SSTRecord.STRING_MINIMAL_OVERHEAD );
        }
    }
    /**
     * Reads a string or the first part of a string.
     *
     * @param characters the number of characters to write.
     *
     * @return the number of bytes written.
     */
    private int processString( final byte[] data, final int dataIndex, final int characters )
    {
        // length is the length we store it as.  not the length that is read.
        int length = SSTRecord.STRING_MINIMAL_OVERHEAD + calculateByteCount( characters );
        byte[] unicodeStringBuffer = new byte[length];
        int offset = 0;
        // Set the length in characters
        LittleEndian.putUShort( unicodeStringBuffer, offset, characters );
        offset += LittleEndianConsts.SHORT_SIZE;
        // Set the option flags
        unicodeStringBuffer[offset] = data[dataIndex + offset];
        // Copy in the string data
        int bytesRead = unicodeStringBuffer.length - SSTRecord.STRING_MINIMAL_OVERHEAD;
        arraycopy( data, dataIndex + stringHeaderOverhead(), unicodeStringBuffer, SSTRecord.STRING_MINIMAL_OVERHEAD, bytesRead );
        // Create the unicode string
        UnicodeString string = new UnicodeString( UnicodeString.sid,
                (short) unicodeStringBuffer.length,
                unicodeStringBuffer );
        if ( isStringFinished() )
        {
            Integer integer = new Integer( strings.size() );
            addToStringTable( strings, integer, string );
        }
        else
        {
            unfinishedString = string.getString();
        }
        return bytesRead;
    }
    private boolean isStringFinished()
    {
        return getContinuationExpectedChars() == 0;
    }
    /**
     * Okay, we are doing some major cheating here. Because we can't handle rich text strings properly
-     * we end up getting duplicate strings.  To get around this I'm doing do things: 1. Converting rich
+     * we end up getting duplicate strings.  To get around this I'm doing two things: 1. Converting rich
     * text to normal text and 2. If there's a duplicate I'm adding a space onto the end.  Sneaky perhaps
     * but it gets the job done until we can handle this a little better.
     */
    static public void addToStringTable( BinaryTree strings, Integer integer, UnicodeString string )
    {
        if ( string.isRichText() )
            string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~8 ) ) );
        if ( string.isExtendedText() )
            string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~4 ) ) );
        boolean added = false;
        while ( added == false )
@ -189,14 +267,13 @@ class SSTDeserializer
                string.setString( string.getString() + " " );
            }
        }
    }
    }
    private int calculateCharCount( final int byte_count )
    {
-        return byte_count / ( wideChar ? LittleEndianConsts.SHORT_SIZE
+        return byte_count / ( wideChar ? LittleEndianConsts.SHORT_SIZE : LittleEndianConsts.BYTE_SIZE );
                : LittleEndianConsts.BYTE_SIZE );
    }
    /**
@ -219,81 +296,129 @@ class SSTDeserializer
     *
     * @param record the Continue record's byte data
     */
    public void processContinueRecord( final byte[] record )
    {
-        if ( getExpectedChars() == 0 )
+        if ( isStringFinished() )
        {
-            unfinishedString = "";
+            initVars();
            totalLengthBytes = 0;
            stringDataOffset = 0;
            wideChar = false;
            manufactureStrings( record, 0, (short) record.length );
        }
        else
        {
-            int data_length = record.length - LittleEndianConsts.BYTE_SIZE;
+            // reset the wide bit because that can change across a continuation. the fact that it's
            // actually rich text doesn't change across continuations even though the rich text
            // may on longer be set in the "new" option flag.  confusing huh?
            wideChar = ( record[0] & 1 ) == 1;
-            if ( calculateByteCount( getExpectedChars() ) > data_length )
+            if ( stringSpansContinuation( record.length - LittleEndianConsts.BYTE_SIZE ) )
            {
-
+                processEntireContinuation( record );
                // create artificial data to create a UnicodeString
                byte[] input =
                        new byte[record.length + LittleEndianConsts.SHORT_SIZE];
                short size = (short) ( ( ( record[0] & 1 ) == 1 )
                        ? ( data_length / LittleEndianConsts.SHORT_SIZE )
                        : ( data_length / LittleEndianConsts.BYTE_SIZE ) );
                LittleEndian.putShort( input, (byte) 0, size );
                System.arraycopy( record, 0, input, LittleEndianConsts.SHORT_SIZE, record.length );
                UnicodeString ucs = new UnicodeString( UnicodeString.sid, (short) input.length, input );
                unfinishedString = unfinishedString + ucs.getString();
                setExpectedChars( getExpectedChars() - size );
            }
            else
            {
-                setupStringParameters( record, -LittleEndianConsts.SHORT_SIZE,
+                readStringRemainder( record );
-                        getExpectedChars() );
+            }
-                byte[] str_data = new byte[totalLengthBytes];
+        }
                int length = SSTRecord.STRING_MINIMAL_OVERHEAD
                        + ( calculateByteCount( getExpectedChars() ) );
                byte[] bstring = new byte[length];
-                // Copy data from the record into the string
+    }
-                // buffer. Copy skips the length of a short in the
+
-                // string buffer, to leave room for the string length.
+    /**
-                System.arraycopy( record, 0, str_data,
+     * Reads the remainder string and any subsequent strings from the continuation record.
-                        LittleEndianConsts.SHORT_SIZE,
+     *
-                        str_data.length
+     * @param record  The entire continuation record data.
-                        - LittleEndianConsts.SHORT_SIZE );
+     */
    private void readStringRemainder( final byte[] record )
    {
        int stringRemainderSizeInBytes = calculateByteCount( getContinuationExpectedChars() );
 //        stringDataOffset = LittleEndianConsts.BYTE_SIZE;
        byte[] unicodeStringData = new byte[SSTRecord.STRING_MINIMAL_OVERHEAD
                + calculateByteCount( getContinuationExpectedChars() )];
        // write the string length
-                LittleEndian.putShort( bstring, 0,
+        LittleEndian.putShort( unicodeStringData, 0, (short) getContinuationExpectedChars() );
                        (short) getExpectedChars() );
        // write the options flag
-                bstring[LittleEndianConsts.SHORT_SIZE] =
+        unicodeStringData[LittleEndianConsts.SHORT_SIZE] = createOptionByte( wideChar, richText, extendedText );
                        str_data[LittleEndianConsts.SHORT_SIZE];
        // copy the bytes/words making up the string; skipping
        // past all the overhead of the str_data array
-                System.arraycopy( str_data, stringDataOffset, bstring,
+        arraycopy( record, LittleEndianConsts.BYTE_SIZE, unicodeStringData,
                SSTRecord.STRING_MINIMAL_OVERHEAD,
-                        bstring.length - SSTRecord.STRING_MINIMAL_OVERHEAD );
+                unicodeStringData.length - SSTRecord.STRING_MINIMAL_OVERHEAD );
        // use special constructor to create the final string
-                UnicodeString string =
+        UnicodeString string = new UnicodeString( UnicodeString.sid,
-                        new UnicodeString( UnicodeString.sid,
+                (short) unicodeStringData.length, unicodeStringData,
                                (short) bstring.length, bstring,
                unfinishedString );
        Integer integer = new Integer( strings.size() );
 //                field_3_strings.put( integer, string );
        addToStringTable( strings, integer, string );
-                manufactureStrings( record, totalLengthBytes - LittleEndianConsts.SHORT_SIZE, (short) record.length );
+
        int newOffset = offsetForContinuedRecord( stringRemainderSizeInBytes );
        manufactureStrings( record, newOffset, (short) ( record.length - newOffset ) );
    }
    /**
     * Calculates the size of the string in bytes based on the character width
     */
    private int stringSizeInBytes()
    {
        return calculateByteCount( charCount );
    }
    /**
     * Calculates the size of the string in byes.  This figure includes all the over
     * heads for the string.
     */
    private int totalStringSize()
    {
        return stringSizeInBytes()
                + stringHeaderOverhead()
                + LittleEndianConsts.INT_SIZE * runCount
                + extensionLength;
    }
    private int stringHeaderOverhead()
    {
        return SSTRecord.STRING_MINIMAL_OVERHEAD
                + ( richText ? LittleEndianConsts.SHORT_SIZE : 0 )
                + ( extendedText ? LittleEndianConsts.INT_SIZE : 0 );
    }
    private int offsetForContinuedRecord( int stringRemainderSizeInBytes )
    {
        return stringRemainderSizeInBytes + LittleEndianConsts.BYTE_SIZE
                + runCount * LittleEndianConsts.INT_SIZE + extensionLength;
    }
    private byte createOptionByte( boolean wideChar, boolean richText, boolean farEast )
    {
        return (byte) ( ( wideChar ? 1 : 0 ) + ( farEast ? 4 : 0 ) + ( richText ? 8 : 0 ) );
    }
    /**
     * If the continued record is so long is spans into the next continue then
     * simply suck the remaining string data into the existing <code>unfinishedString</code>.
     *
     * @param record    The data from the continuation record.
     */
    private void processEntireContinuation( final byte[] record )
    {
        // create artificial data to create a UnicodeString
        int dataLengthInBytes = record.length - LittleEndianConsts.BYTE_SIZE;
        byte[] unicodeStringData = new byte[record.length + LittleEndianConsts.SHORT_SIZE];
        LittleEndian.putShort( unicodeStringData, (byte) 0, (short) calculateCharCount( dataLengthInBytes ) );
        arraycopy( record, 0, unicodeStringData, LittleEndianConsts.SHORT_SIZE, record.length );
        UnicodeString ucs = new UnicodeString( UnicodeString.sid, (short) unicodeStringData.length, unicodeStringData );
        unfinishedString = unfinishedString + ucs.getString();
        setContinuationExpectedChars( getContinuationExpectedChars() - calculateCharCount( dataLengthInBytes ) );
    }
    private boolean stringSpansContinuation( int continuationSizeInBytes )
    {
        return calculateByteCount( getContinuationExpectedChars() ) > continuationSizeInBytes;
    }
    /**
@ -301,12 +426,12 @@ class SSTDeserializer
     *         sub-record in a subsequent continuation record
     */
-    int getExpectedChars()
+    int getContinuationExpectedChars()
    {
        return continuationExpectedChars;
    }
-    private void setExpectedChars( final int count )
+    private void setContinuationExpectedChars( final int count )
    {
        continuationExpectedChars = count;
    }
@ -317,37 +442,116 @@ class SSTDeserializer
    }
    /**
     * Copies an array from the specified source array, beginning at the
     * specified position, to the specified position of the destination array.
     * A subsequence of array components are copied from the source
     * array referenced by <code>src</code> to the destination array
     * referenced by <code>dst</code>. The number of components copied is
     * equal to the <code>length</code> argument. The components at
     * positions <code>srcOffset</code> through
     * <code>srcOffset+length-1</code> in the source array are copied into
     * positions <code>dstOffset</code> through
     * <code>dstOffset+length-1</code>, respectively, of the destination
     * array.
     * <p>
     * If the <code>src</code> and <code>dst</code> arguments refer to the
     * same array object, then the copying is performed as if the
     * components at positions <code>srcOffset</code> through
     * <code>srcOffset+length-1</code> were first copied to a temporary
     * array with <code>length</code> components and then the contents of
     * the temporary array were copied into positions
     * <code>dstOffset</code> through <code>dstOffset+length-1</code> of the
     * destination array.
     * <p>
     * If <code>dst</code> is <code>null</code>, then a
     * <code>NullPointerException</code> is thrown.
     * <p>
     * If <code>src</code> is <code>null</code>, then a
     * <code>NullPointerException</code> is thrown and the destination
     * array is not modified.
     * <p>
     * Otherwise, if any of the following is true, an
     * <code>ArrayStoreException</code> is thrown and the destination is
     * not modified:
     * <ul>
     * <li>The <code>src</code> argument refers to an object that is not an
     *     array.
     * <li>The <code>dst</code> argument refers to an object that is not an
     *     array.
     * <li>The <code>src</code> argument and <code>dst</code> argument refer to
     *     arrays whose component types are different primitive types.
     * <li>The <code>src</code> argument refers to an array with a primitive
     *     component type and the <code>dst</code> argument refers to an array
     *     with a reference component type.
     * <li>The <code>src</code> argument refers to an array with a reference
     *     component type and the <code>dst</code> argument refers to an array
     *     with a primitive component type.
     * </ul>
     * <p>
     * Otherwise, if any of the following is true, an
     * <code>IndexOutOfBoundsException</code> is
     * thrown and the destination is not modified:
     * <ul>
     * <li>The <code>srcOffset</code> argument is negative.
     * <li>The <code>dstOffset</code> argument is negative.
     * <li>The <code>length</code> argument is negative.
     * <li><code>srcOffset+length</code> is greater than
     *     <code>src.length</code>, the length of the source array.
     * <li><code>dstOffset+length</code> is greater than
     *     <code>dst.length</code>, the length of the destination array.
     * </ul>
     * <p>
     * Otherwise, if any actual component of the source array from
     * position <code>srcOffset</code> through
     * <code>srcOffset+length-1</code> cannot be converted to the component
     * type of the destination array by assignment conversion, an
     * <code>ArrayStoreException</code> is thrown. In this case, let
     * <b><i>k</i></b> be the smallest nonnegative integer less than
     * length such that <code>src[srcOffset+</code><i>k</i><code>]</code>
     * cannot be converted to the component type of the destination
     * array; when the exception is thrown, source array components from
     * positions <code>srcOffset</code> through
     * <code>srcOffset+</code><i>k</i><code>-1</code>
     * will already have been copied to destination array positions
     * <code>dstOffset</code> through
     * <code>dstOffset+</code><i>k</I><code>-1</code> and no other
     * positions of the destination array will have been modified.
     * (Because of the restrictions already itemized, this
     * paragraph effectively applies only to the situation where both
     * arrays have component types that are reference types.)
     *
     * @param      src          the source array.
     * @param      src_position start position in the source array.
     * @param      dst          the destination array.
     * @param      dst_position pos   start position in the destination data.
     * @param      length       the number of array elements to be copied.
     * @exception  IndexOutOfBoundsException  if copying would cause
     *               access of data outside array bounds.
     * @exception  ArrayStoreException  if an element in the <code>src</code>
     *               array could not be stored into the <code>dest</code> array
     *               because of a type mismatch.
     * @exception  NullPointerException if either <code>src</code> or
     *               <code>dst</code> is <code>null</code>.
     */
    private void arraycopy( byte[] src, int src_position,
                            byte[] dst, int dst_position,
                            int length )
    {
        System.arraycopy( src, src_position, dst, dst_position, length );
    }
    /**
     * @return the unfinished string
     */
    String getUnfinishedString()
    {
        return unfinishedString;
    }
    /**
     * @return the total length of the current string
     */
    int getTotalLength()
    {
        return totalLengthBytes;
    }
    /**
     * @return offset into current string data
     */
    int getStringDataOffset()
    {
        return stringDataOffset;
    }
    /**
     * @return true if current string uses wide characters
     */
    boolean isWideChar()
    {
        return wideChar;
--- a/src/java/org/apache/poi/hssf/record/SSTRecord.java
+++ b/src/java/org/apache/poi/hssf/record/SSTRecord.java
@ -478,7 +478,7 @@ public class SSTRecord
        field_2_num_unique_strings = LittleEndian.getInt( data, 4 + offset );
        field_3_strings = new BinaryTree();
        deserializer = new SSTDeserializer(field_3_strings);
-        deserializer.manufactureStrings( data, 8 + offset, size );
+        deserializer.manufactureStrings( data, 8 + offset, (short)(size - 8) );
    }
--- a/src/java/org/apache/poi/hssf/record/UnicodeString.java
+++ b/src/java/org/apache/poi/hssf/record/UnicodeString.java
@ -79,6 +79,7 @@ public class UnicodeString
    private byte              field_2_optionflags;   // = 0;
    private String            field_3_string;        // = null;
    private final int RICH_TEXT_BIT = 8;
    private final int EXT_BIT = 4;
    public UnicodeString()
    {
@ -364,4 +365,9 @@ public class UnicodeString
        return rval;
    }
    public boolean isExtendedText()
    {
        return (getOptionFlags() & EXT_BIT) != 0;
    }
 }
--- a/src/java/org/apache/poi/util/HexDump.java
+++ b/src/java/org/apache/poi/util/HexDump.java
@ -81,6 +81,7 @@ public class HexDump
     * @param stream the OutputStream to which the data is to be
     *               written
     * @param index initial index into the byte array
     * @param length number of characters to output
     *
     * @exception IOException is thrown if anything goes wrong writing
     *            the data to stream
@ -89,9 +90,8 @@ public class HexDump
     * @exception IllegalArgumentException if the output stream is
     *            null
     */
    public synchronized static void dump(final byte [] data, final long offset,
-                            final OutputStream stream, final int index)
+                            final OutputStream stream, final int index, final int length)
            throws IOException, ArrayIndexOutOfBoundsException,
                    IllegalArgumentException
    {
@ -108,9 +108,11 @@ public class HexDump
        long         display_offset = offset + index;
        StringBuffer buffer         = new StringBuffer(74);
-        for (int j = index; j < data.length; j += 16)
+
        int data_length = Math.min(data.length,index+length);
        for (int j = index; j < data_length; j += 16)
        {
-            int chars_read = data.length - j;
+            int chars_read = data_length - j;
            if (chars_read > 16)
            {
@ -146,6 +148,32 @@ public class HexDump
            buffer.setLength(0);
            display_offset += chars_read;
        }
    }
    /**
     * dump an array of bytes to an OutputStream
     *
     * @param data the byte array to be dumped
     * @param offset its offset, whatever that might mean
     * @param stream the OutputStream to which the data is to be
     *               written
     * @param index initial index into the byte array
     *
     * @exception IOException is thrown if anything goes wrong writing
     *            the data to stream
     * @exception ArrayIndexOutOfBoundsException if the index is
     *            outside the data array's bounds
     * @exception IllegalArgumentException if the output stream is
     *            null
     */
    public synchronized static void dump(final byte [] data, final long offset,
                            final OutputStream stream, final int index)
        throws IOException, ArrayIndexOutOfBoundsException,
                IllegalArgumentException
    {
        dump(data, offset, stream, index, data.length-index);
    }
    public static final String        EOL         =
--- a/src/java/org/apache/poi/util/LittleEndian.java
+++ b/src/java/org/apache/poi/util/LittleEndian.java
@ -236,13 +236,27 @@ public class LittleEndian
     *
     * @exception ArrayIndexOutOfBoundsException may be thrown
     */
    public static void putShort(final byte[] data, final int offset,
                                final short value)
    {
        putNumber(data, offset, value, SHORT_SIZE);
    }
    /**
     * put an unsigned short value into a byte array
     *
     * @param data the byte array
     * @param offset a starting offset into the byte array
     * @param value the short (16-bit) value
     *
     * @exception ArrayIndexOutOfBoundsException may be thrown
     */
    public static void putUShort(final byte[] data, final int offset,
                                final int value)
    {
        putNumber(data, offset, value, SHORT_SIZE);
    }
    /**
     * put a array of shorts into a byte array
     *
--- a/src/testcases/org/apache/poi/hssf/data/evencontinuation.txt
+++ b/src/testcases/org/apache/poi/hssf/data/evencontinuation.txt
@ -0,0 +1,16 @@
 14 00                                               # String length 0x14=20
 01                                                  # Option flag, 16bit
 # String: At a dinner party or
 41 00 74 00 20 00 61 00 20 00
 64 00 69 00 6E 00 6E 00 65 00
 72 00 20 00 70 00 61 00 72 00
 74 00 79 00 20 00 6F 00 72 00
 # Continuation record (new string on the boundry)
 11 00                                               # String length 0x11=17
 00                                                  # Option flag, 8bit
 # String: At a dinner party
 41 74 20 61 20
 64 69 6E 6E 65
 72 20 70 61 72
 74 79
--- a/src/testcases/org/apache/poi/hssf/data/richtextdata.txt
+++ b/src/testcases/org/apache/poi/hssf/data/richtextdata.txt
@ -0,0 +1,21 @@
 1D 00                                               # String length 0x1b=29
 09                                                  # Option flag, rich text + 16bit
 02 00                                               # Formatting runs
 # String: At a dinner party or
 41 00 74 00 20 00 61 00 20 00
 64 00 69 00 6E 00 6E 00 65 00
 72 00 20 00 70 00 61 00 72 00
 74 00 79 00 20 00 6F 00 72 00
 # Continuation record
 00                                                  # option flag
 # string:at at at
 41 74 20
 41 74 20
 41 74 20
 00 00                                               # Formatting run 1, first formated char at 0
 00 00                                               # Formatting run 1, Index to font record
 02 00                                               # Formatting run 2, first formated char at 2
 00 00                                               # Formatting run 2, Index to font record
--- a/src/testcases/org/apache/poi/hssf/data/stringacross2continuations.txt
+++ b/src/testcases/org/apache/poi/hssf/data/stringacross2continuations.txt
@ -0,0 +1,7 @@
 14 00                                               # String length 0x14=20
 01                                                  # Option flag, 16bit
 # String: At a dinner party or
 41 00 74 00 20 00 61 00 20 00
 64 00 69 00 6E 00 6E 00 65 00
 72 00 20 00 70 00 61 00 72 00
 74 00 79 00 20 00 6F 00 72 00
--- a/src/testcases/org/apache/poi/hssf/data/stringacross2continuationsCR1.txt
+++ b/src/testcases/org/apache/poi/hssf/data/stringacross2continuationsCR1.txt
@ -0,0 +1,9 @@
 # Continuation record
 22 00                                               # String length 0x11=17
 00                                                  # Option flag, 8bit
 # String: At a dinner party
 41 74 20 61 20
 64 69 6E 6E 65
 72 20 70 61 72
 74 79
--- a/src/testcases/org/apache/poi/hssf/data/stringacross2continuationsCR2.txt
+++ b/src/testcases/org/apache/poi/hssf/data/stringacross2continuationsCR2.txt
@ -0,0 +1,7 @@
 # Continuation record
 00                                                  # option flag
 # String: At a dinner party
 41 74 20 61 20
 64 69 6E 6E 65
 72 20 70 61 72
 74 79
--- a/src/testcases/org/apache/poi/hssf/record/TestSSTRecord.java
+++ b/src/testcases/org/apache/poi/hssf/record/TestSSTRecord.java
@ -55,19 +55,20 @@
 package org.apache.poi.hssf.record;
 import junit.framework.TestCase;
 import org.apache.poi.hssf.usermodel.HSSFSheet;
 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 import org.apache.poi.util.BinaryTree;
 import org.apache.poi.util.HexRead;
 import org.apache.poi.util.LittleEndian;
 import org.apache.poi.util.LittleEndianConsts;
 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 import org.apache.poi.hssf.usermodel.HSSFSheet;
 import java.io.*;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
 /**
 * @author Marc Johnson (mjohnson at apache dot org)
 * @author Glen Stampoultzis (glens at apache.org)
 */
 public class TestSSTRecord
@ -98,14 +99,14 @@ public class TestSSTRecord
    public void testProcessContinueRecord()
            throws IOException
    {
-        byte[] testdata = readTestData( "BigSSTRecord" );
+        byte[] testdata = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord" );
        byte[] input = new byte[testdata.length - 4];
        System.arraycopy( testdata, 4, input, 0, input.length );
        SSTRecord record =
                new SSTRecord( LittleEndian.getShort( testdata, 0 ),
                        LittleEndian.getShort( testdata, 2 ), input );
-        byte[] continueRecord = readTestData( "BigSSTRecordCR" );
+        byte[] continueRecord = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecordCR" );
        input = new byte[continueRecord.length - 4];
        System.arraycopy( continueRecord, 4, input, 0, input.length );
@ -141,42 +142,42 @@ public class TestSSTRecord
        assertEquals( record, testRecord );
        // testing based on new bug report
-        testdata = readTestData( "BigSSTRecord2" );
+        testdata = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2" );
        input = new byte[testdata.length - 4];
        System.arraycopy( testdata, 4, input, 0, input.length );
        record = new SSTRecord( LittleEndian.getShort( testdata, 0 ),
                LittleEndian.getShort( testdata, 2 ), input );
-        byte[] continueRecord1 = readTestData( "BigSSTRecord2CR1" );
+        byte[] continueRecord1 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR1" );
        input = new byte[continueRecord1.length - 4];
        System.arraycopy( continueRecord1, 4, input, 0, input.length );
        record.processContinueRecord( input );
-        byte[] continueRecord2 = readTestData( "BigSSTRecord2CR2" );
+        byte[] continueRecord2 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR2" );
        input = new byte[continueRecord2.length - 4];
        System.arraycopy( continueRecord2, 4, input, 0, input.length );
        record.processContinueRecord( input );
-        byte[] continueRecord3 = readTestData( "BigSSTRecord2CR3" );
+        byte[] continueRecord3 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR3" );
        input = new byte[continueRecord3.length - 4];
        System.arraycopy( continueRecord3, 4, input, 0, input.length );
        record.processContinueRecord( input );
-        byte[] continueRecord4 = readTestData( "BigSSTRecord2CR4" );
+        byte[] continueRecord4 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR4" );
        input = new byte[continueRecord4.length - 4];
        System.arraycopy( continueRecord4, 4, input, 0, input.length );
        record.processContinueRecord( input );
-        byte[] continueRecord5 = readTestData( "BigSSTRecord2CR5" );
+        byte[] continueRecord5 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR5" );
        input = new byte[continueRecord5.length - 4];
        System.arraycopy( continueRecord5, 4, input, 0, input.length );
        record.processContinueRecord( input );
-        byte[] continueRecord6 = readTestData( "BigSSTRecord2CR6" );
+        byte[] continueRecord6 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR6" );
        input = new byte[continueRecord6.length - 4];
        System.arraycopy( continueRecord6, 4, input, 0, input.length );
        record.processContinueRecord( input );
-        byte[] continueRecord7 = readTestData( "BigSSTRecord2CR7" );
+        byte[] continueRecord7 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR7" );
        input = new byte[continueRecord7.length - 4];
        System.arraycopy( continueRecord7, 4, input, 0, input.length );
@ -208,6 +209,7 @@ public class TestSSTRecord
        }
        assertEquals( offset, ser_output.length );
        assertEquals( record, testRecord );
        assertEquals( record.countStrings(), testRecord.countStrings() );
    }
    /**
@ -333,7 +335,6 @@ public class TestSSTRecord
     *
     * @exception IOException
     */
    public void testSSTRecordBug()
            throws IOException
    {
@ -366,7 +367,6 @@ public class TestSSTRecord
    /**
     * test simple addString
     */
    public void testSimpleAddString()
    {
        SSTRecord record = new SSTRecord();
@ -420,7 +420,7 @@ public class TestSSTRecord
    public void testReaderConstructor()
            throws IOException
    {
-        byte[] testdata = readTestData( "BigSSTRecord" );
+        byte[] testdata = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord" );
        byte[] input = new byte[testdata.length - 4];
        System.arraycopy( testdata, 4, input, 0, input.length );
@ -431,11 +431,11 @@ public class TestSSTRecord
        assertEquals( 1464, record.getNumStrings() );
        assertEquals( 688, record.getNumUniqueStrings() );
        assertEquals( 492, record.countStrings() );
-        assertEquals( 1, record.getDeserializer().getExpectedChars() );
+        assertEquals( 1, record.getDeserializer().getContinuationExpectedChars() );
        assertEquals( "Consolidated B-24J Liberator The Dragon & His Tai",
                record.getDeserializer().getUnfinishedString() );
-        assertEquals( 52, record.getDeserializer().getTotalLength() );
+//        assertEquals( 52, record.getDeserializer().getTotalLength() );
-        assertEquals( 3, record.getDeserializer().getStringDataOffset() );
+//        assertEquals( 3, record.getDeserializer().getStringDataOffset() );
        assertTrue( !record.getDeserializer().isWideChar() );
    }
@ -450,10 +450,10 @@ public class TestSSTRecord
        assertEquals( 0, record.getNumStrings() );
        assertEquals( 0, record.getNumUniqueStrings() );
        assertEquals( 0, record.countStrings() );
-        assertEquals( 0, record.getDeserializer().getExpectedChars() );
+        assertEquals( 0, record.getDeserializer().getContinuationExpectedChars() );
        assertEquals( "", record.getDeserializer().getUnfinishedString() );
-        assertEquals( 0, record.getDeserializer().getTotalLength() );
+//        assertEquals( 0, record.getDeserializer().getTotalLength() );
-        assertEquals( 0, record.getDeserializer().getStringDataOffset() );
+//        assertEquals( 0, record.getDeserializer().getStringDataOffset() );
        assertTrue( !record.getDeserializer().isWideChar() );
        byte[] output = record.serialize();
        byte[] expected =
@ -482,99 +482,6 @@ public class TestSSTRecord
        junit.textui.TestRunner.run( TestSSTRecord.class );
    }
    private byte[] readTestData( String filename )
            throws IOException
    {
        File file = new File( _test_file_path
                + File.separator
                + filename );
        FileInputStream stream = new FileInputStream( file );
        int characterCount = 0;
        byte b = (byte) 0;
        List bytes = new ArrayList();
        boolean done = false;
        while ( !done )
        {
            int count = stream.read();
            switch ( count )
            {
                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                    b <<= 4;
                    b += (byte) ( count - '0' );
                    characterCount++;
                    if ( characterCount == 2 )
                    {
                        bytes.add( new Byte( b ) );
                        characterCount = 0;
                        b = (byte) 0;
                    }
                    break;
                case 'A':
                case 'B':
                case 'C':
                case 'D':
                case 'E':
                case 'F':
                    b <<= 4;
                    b += (byte) ( count + 10 - 'A' );
                    characterCount++;
                    if ( characterCount == 2 )
                    {
                        bytes.add( new Byte( b ) );
                        characterCount = 0;
                        b = (byte) 0;
                    }
                    break;
                case 'a':
                case 'b':
                case 'c':
                case 'd':
                case 'e':
                case 'f':
                    b <<= 4;
                    b += (byte) ( count + 10 - 'a' );
                    characterCount++;
                    if ( characterCount == 2 )
                    {
                        bytes.add( new Byte( b ) );
                        characterCount = 0;
                        b = (byte) 0;
                    }
                    break;
                case -1:
                    done = true;
                    break;
                default :
                    break;
            }
        }
        stream.close();
        Byte[] polished = (Byte[]) bytes.toArray( new Byte[0] );
        byte[] rval = new byte[polished.length];
        for ( int j = 0; j < polished.length; j++ )
        {
            rval[j] = polished[j].byteValue();
        }
        return rval;
    }
    /**
     * Tests that workbooks with rich text that duplicates a non rich text cell can be read and written.
     */
@ -616,4 +523,52 @@ public class TestSSTRecord
        outStream.close();
        file.delete();
    }
    public void testSpanRichTextToPlainText()
            throws Exception
    {
        byte[] bytes = HexRead.readTestData( _test_file_path + File.separator + "richtextdata.txt" );
        BinaryTree strings = new BinaryTree();
        SSTDeserializer deserializer = new SSTDeserializer( strings );
        deserializer.manufactureStrings( bytes, 0, (short) 45 );
        byte[] continueBytes = new byte[bytes.length - 45];
        System.arraycopy( bytes, 45, continueBytes, 0, bytes.length - 45 );
        deserializer.processContinueRecord( continueBytes );
 //        System.out.println( "strings.getKeyForValue(new Integer(0)) = " + strings.get( new Integer( 0 ) ) );
        assertEquals( "At a dinner party orAt At At ", strings.get( new Integer( 0 ) ) + "" );
    }
    public void testContinuationWithNoOverlap()
            throws Exception
    {
        byte[] bytes = HexRead.readTestData( _test_file_path + File.separator + "evencontinuation.txt" );
        BinaryTree strings = new BinaryTree();
        SSTDeserializer deserializer = new SSTDeserializer( strings );
        deserializer.manufactureStrings( bytes, 0, (short) 43 );
        byte[] continueBytes = new byte[bytes.length - 43];
        System.arraycopy( bytes, 43, continueBytes, 0, bytes.length - 43 );
        deserializer.processContinueRecord( continueBytes );
        assertEquals( "At a dinner party or", strings.get( new Integer( 0 ) ) + "" );
        assertEquals( "At a dinner party", strings.get( new Integer( 1 ) ) + "" );
    }
    public void testStringAcross2Continuations()
            throws Exception
    {
        byte[] bytes = HexRead.readTestData( _test_file_path + File.separator + "stringacross2continuations.txt" );
        BinaryTree strings = new BinaryTree();
        SSTDeserializer deserializer = new SSTDeserializer( strings );
        deserializer.manufactureStrings( bytes, 0, (short) 43 );
        bytes = HexRead.readTestData( _test_file_path + File.separator + "stringacross2continuationsCR1.txt" );
        deserializer.processContinueRecord( bytes );
        bytes = HexRead.readTestData( _test_file_path + File.separator + "stringacross2continuationsCR2.txt" );
        deserializer.processContinueRecord( bytes );
        assertEquals( "At a dinner party or", strings.get( new Integer( 0 ) ) + "" );
        assertEquals( "At a dinner partyAt a dinner party", strings.get( new Integer( 1 ) ) + "" );
    }
 }