SST fixed!!! Yay... Will reliably read in spreadsheets that have rich text or extended text. Code is a bit cleaner now but could still use more improvement. If I have the energy I'll look into it.

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/branches/REL_1_5_BRANCH@352663 13f79535-47bb-0310-9956-ffa450edef68
2002-06-09 12:33:26 +00:00 · 2002-06-09 12:33:26 +00:00 · d7672fa259
parent 8a4d120c34
commit d7672fa259
13 changed files with 1113 additions and 846 deletions
--- a/module.xml
+++ b/module.xml
@ -68,8 +68,8 @@
  
     <version major="1"
             minor="5"   
-             fix  ="0"   
-             tag="dev"/> 
+	     fix  ="1"   
+	     tag="dev"/>
             
    <package>org.apache.poi</package>

--- a/src/java/org/apache/poi/hssf/record/SSTDeserializer.java
+++ b/src/java/org/apache/poi/hssf/record/SSTDeserializer.java
@ -1,12 +1,68 @@
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2002 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache POI" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache POI", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
 package org.apache.poi.hssf.record;

+import org.apache.poi.util.BinaryTree;
 import org.apache.poi.util.LittleEndian;
 import org.apache.poi.util.LittleEndianConsts;
-import org.apache.poi.util.BinaryTree;
-import org.apache.poi.util.HexDump;
-
-import java.io.IOException;

+/**
+ * Handles the task of deserializing a SST string.  The two main entry points are
+ *
+ * @author Glen Stampoultzis (glens at apache.org)
+ */
 class SSTDeserializer
 {

@ -15,22 +71,35 @@ class SSTDeserializer
    private int continuationExpectedChars;
    /** this is the string we were working on before hitting the end of the current record. This string is NOT finished. */
    private String unfinishedString;
-    /** this is the total length of the current string being handled */
-    private int totalLengthBytes;
-    /** this is the offset into a string field of the actual string data */
-    private int stringDataOffset;
    /** this is true if the string uses wide characters */
    private boolean wideChar;
+    /** this is true if the string is a rich text string */
+    private boolean richText;
+    /** this is true if the string is a far east string or some other wierd string */
+    private boolean extendedText;
+    /** Number of formatting runs in this rich text field */
+    private short runCount;
+    /** Number of characters in current string */
+    private int charCount;
+    private int extensionLength;


-    public SSTDeserializer(BinaryTree strings)
+    public SSTDeserializer( BinaryTree strings )
    {
        this.strings = strings;
-        setExpectedChars( 0 );
+        initVars();
+    }
+
+    private void initVars()
+    {
+        runCount = 0;
+        continuationExpectedChars = 0;
        unfinishedString = "";
-        totalLengthBytes = 0;
-        stringDataOffset = 0;
+//        bytesInCurrentSegment = 0;
+//        stringDataOffset = 0;
        wideChar = false;
+        richText = false;
+        extendedText = false;
    }

    /**
@ -38,14 +107,14 @@ class SSTDeserializer
     * strings may span across multiple continuations. Read the SST record
     * carefully before beginning to hack.
     */
-    public void manufactureStrings( final byte[] data, final int index,
-                                     short size )
+    public void manufactureStrings( final byte[] data, final int initialOffset, short dataSize )
    {
-        int offset = index;
+        initVars();

-        while ( offset < size )
+        int offset = initialOffset;
+        while ( ( offset - initialOffset ) < dataSize )
        {
-            int remaining = size - offset;
+            int remaining = dataSize - offset + initialOffset;

            if ( ( remaining > 0 ) && ( remaining < LittleEndianConsts.SHORT_SIZE ) )
            {
@ -53,90 +122,38 @@ class SSTDeserializer
            }
            if ( remaining == LittleEndianConsts.SHORT_SIZE )
            {
-                setExpectedChars( LittleEndian.getUShort( data, offset ) );
+                setContinuationExpectedChars( LittleEndian.getUShort( data, offset ) );
                unfinishedString = "";
                break;
            }
-            short charCount = LittleEndian.getShort( data, offset );
-
-            setupStringParameters( data, offset, charCount );
-            if ( remaining < totalLengthBytes )
+            charCount = LittleEndian.getUShort( data, offset );
+            readStringHeader( data, offset );
+            boolean stringContinuesOverContinuation = remaining < totalStringSize();
+            if ( stringContinuesOverContinuation )
            {
-                setExpectedChars( calculateCharCount( totalLengthBytes - remaining ) );
-                charCount -= getExpectedChars();
-                totalLengthBytes = remaining;
+                int remainingBytes = ( initialOffset + dataSize ) - offset - stringHeaderOverhead();
+                setContinuationExpectedChars( charCount - calculateCharCount( remainingBytes ) );
+                charCount -= getContinuationExpectedChars();
            }
            else
            {
-                setExpectedChars( 0 );
+                setContinuationExpectedChars( 0 );
            }
            processString( data, offset, charCount );
-            offset += totalLengthBytes;
-            if ( getExpectedChars() != 0 )
+            offset += totalStringSize();
+            if ( getContinuationExpectedChars() != 0 )
            {
                break;
            }
        }
    }

-
-    /**
-     * Detemines the option types for the string (ie, compressed or uncompressed unicode, rich text string or
-     * plain string etc) and calculates the length and offset for the string.
-     *
-     * @param data
-     * @param index
-     * @param char_count
-     */
-    private void setupStringParameters( final byte[] data, final int index,
-                                        final int char_count )
-    {
-        byte optionFlag = data[index + LittleEndianConsts.SHORT_SIZE];
-
-        wideChar = ( optionFlag & 1 ) == 1;
-        boolean farEast = ( optionFlag & 4 ) == 4;
-        boolean richText = ( optionFlag & 8 ) == 8;
-
-        totalLengthBytes = SSTRecord.STRING_MINIMAL_OVERHEAD + calculateByteCount( char_count );
-        stringDataOffset = SSTRecord.STRING_MINIMAL_OVERHEAD;
-        if ( richText )
-        {
-            short run_count = LittleEndian.getShort( data, index + stringDataOffset );
-
-            stringDataOffset += LittleEndianConsts.SHORT_SIZE;
-            totalLengthBytes += LittleEndianConsts.SHORT_SIZE + ( LittleEndianConsts.INT_SIZE * run_count );
-        }
-        if ( farEast )
-        {
-            int extension_length = LittleEndian.getInt( data, index + stringDataOffset );
-
-            stringDataOffset += LittleEndianConsts.INT_SIZE;
-            totalLengthBytes += LittleEndianConsts.INT_SIZE + extension_length;
-        }
-    }
-
-
-    private void processString( final byte[] data, final int index,
-                                final short char_count )
-    {
-        byte[] stringDataBuffer = new byte[totalLengthBytes];
-        int length = SSTRecord.STRING_MINIMAL_OVERHEAD + calculateByteCount( char_count );
-        byte[] bstring = new byte[length];
-
-        System.arraycopy( data, index, stringDataBuffer, 0, stringDataBuffer.length );
-        int offset = 0;
-
-        LittleEndian.putShort( bstring, offset, char_count );
-        offset += LittleEndianConsts.SHORT_SIZE;
-        bstring[offset] = stringDataBuffer[offset];
-
-//        System.out.println( "offset = " + stringDataOffset );
-//        System.out.println( "length = " + (bstring.length - STRING_MINIMAL_OVERHEAD) );
-//        System.out.println( "src.length = " + str_data.length );
+//    private void dump( final byte[] data, int offset, int length )
+//    {
 //        try
 //        {
-//            System.out.println( "----------------------- DUMP -------------------------" );
-//            HexDump.dump( stringDataBuffer, (long)stringDataOffset, System.out, 1);
+//            System.out.println( "------------------- SST DUMP -------------------------" );
+//            HexDump.dump( (byte[]) data, offset, System.out, offset, length );
 //        }
 //        catch ( IOException e )
 //        {
@ -147,56 +164,116 @@ class SSTDeserializer
 //        catch ( IllegalArgumentException e )
 //        {
 //        }
-        System.arraycopy( stringDataBuffer, stringDataOffset, bstring,
-                SSTRecord.STRING_MINIMAL_OVERHEAD,
-                bstring.length - SSTRecord.STRING_MINIMAL_OVERHEAD );
-        UnicodeString string = new UnicodeString( UnicodeString.sid,
-                (short) bstring.length,
-                bstring );
+//    }

-        if ( getExpectedChars() != 0 )
+    /**
+     * Detemines the option types for the string (ie, compressed or uncompressed unicode, rich text string or
+     * plain string etc) and calculates the length and offset for the string.
+     *
+     */
+    private void readStringHeader( final byte[] data, final int index )
+    {
+
+        byte optionFlag = data[index + LittleEndianConsts.SHORT_SIZE];
+
+        wideChar = ( optionFlag & 1 ) == 1;
+        extendedText = ( optionFlag & 4 ) == 4;
+        richText = ( optionFlag & 8 ) == 8;
+        runCount = 0;
+        if ( richText )
        {
-            unfinishedString = string.getString();
+            runCount = LittleEndian.getShort( data, index + SSTRecord.STRING_MINIMAL_OVERHEAD );
        }
-        else
+        extensionLength = 0;
+        if ( extendedText )
+        {
+            extensionLength = LittleEndian.getInt( data, index + SSTRecord.STRING_MINIMAL_OVERHEAD );
+        }
+
+    }
+
+
+    /**
+     * Reads a string or the first part of a string.
+     *
+     * @param characters the number of characters to write.
+     *
+     * @return the number of bytes written.
+     */
+    private int processString( final byte[] data, final int dataIndex, final int characters )
+    {
+
+        // length is the length we store it as.  not the length that is read.
+        int length = SSTRecord.STRING_MINIMAL_OVERHEAD + calculateByteCount( characters );
+        byte[] unicodeStringBuffer = new byte[length];
+
+        int offset = 0;
+
+        // Set the length in characters
+        LittleEndian.putUShort( unicodeStringBuffer, offset, characters );
+        offset += LittleEndianConsts.SHORT_SIZE;
+        // Set the option flags
+        unicodeStringBuffer[offset] = data[dataIndex + offset];
+        // Copy in the string data
+        int bytesRead = unicodeStringBuffer.length - SSTRecord.STRING_MINIMAL_OVERHEAD;
+        arraycopy( data, dataIndex + stringHeaderOverhead(), unicodeStringBuffer, SSTRecord.STRING_MINIMAL_OVERHEAD, bytesRead );
+        // Create the unicode string
+        UnicodeString string = new UnicodeString( UnicodeString.sid,
+                (short) unicodeStringBuffer.length,
+                unicodeStringBuffer );
+
+        if ( isStringFinished() )
        {
            Integer integer = new Integer( strings.size() );
            addToStringTable( strings, integer, string );
        }
+        else
+        {
+            unfinishedString = string.getString();
+        }
+
+        return bytesRead;
+    }
+
+    private boolean isStringFinished()
+    {
+        return getContinuationExpectedChars() == 0;
    }

    /**
     * Okay, we are doing some major cheating here. Because we can't handle rich text strings properly
-     * we end up getting duplicate strings.  To get around this I'm doing do things: 1. Converting rich
+     * we end up getting duplicate strings.  To get around this I'm doing two things: 1. Converting rich
     * text to normal text and 2. If there's a duplicate I'm adding a space onto the end.  Sneaky perhaps
     * but it gets the job done until we can handle this a little better.
     */
    static public void addToStringTable( BinaryTree strings, Integer integer, UnicodeString string )
    {
-        if (string.isRichText())
-            string.setOptionFlags( (byte)(string.getOptionFlags() & (~8) ) );
+
+        if ( string.isRichText() )
+            string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~8 ) ) );
+        if ( string.isExtendedText() )
+            string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~4 ) ) );

        boolean added = false;
-        while (added == false)
+        while ( added == false )
        {
            try
            {
                strings.put( integer, string );
                added = true;
            }
-            catch( Exception ignore )
+            catch ( Exception ignore )
            {
                string.setString( string.getString() + " " );
            }
        }
-    }

+    }


    private int calculateCharCount( final int byte_count )
    {
-        return byte_count / ( wideChar ? LittleEndianConsts.SHORT_SIZE
-                : LittleEndianConsts.BYTE_SIZE );
+        return byte_count / ( wideChar ? LittleEndianConsts.SHORT_SIZE : LittleEndianConsts.BYTE_SIZE );
    }

    /**
@ -219,81 +296,129 @@ class SSTDeserializer
     *
     * @param record the Continue record's byte data
     */
-
    public void processContinueRecord( final byte[] record )
    {
-        if ( getExpectedChars() == 0 )
+        if ( isStringFinished() )
        {
-            unfinishedString = "";
-            totalLengthBytes = 0;
-            stringDataOffset = 0;
-            wideChar = false;
+            initVars();
            manufactureStrings( record, 0, (short) record.length );
        }
        else
        {
-            int data_length = record.length - LittleEndianConsts.BYTE_SIZE;
+            // reset the wide bit because that can change across a continuation. the fact that it's
+            // actually rich text doesn't change across continuations even though the rich text
+            // may on longer be set in the "new" option flag.  confusing huh?
+            wideChar = ( record[0] & 1 ) == 1;

-            if ( calculateByteCount( getExpectedChars() ) > data_length )
+            if ( stringSpansContinuation( record.length - LittleEndianConsts.BYTE_SIZE ) )
            {
-
-                // create artificial data to create a UnicodeString
-                byte[] input =
-                        new byte[record.length + LittleEndianConsts.SHORT_SIZE];
-                short size = (short) ( ( ( record[0] & 1 ) == 1 )
-                        ? ( data_length / LittleEndianConsts.SHORT_SIZE )
-                        : ( data_length / LittleEndianConsts.BYTE_SIZE ) );
-
-                LittleEndian.putShort( input, (byte) 0, size );
-                System.arraycopy( record, 0, input, LittleEndianConsts.SHORT_SIZE, record.length );
-                UnicodeString ucs = new UnicodeString( UnicodeString.sid, (short) input.length, input );
-
-                unfinishedString = unfinishedString + ucs.getString();
-                setExpectedChars( getExpectedChars() - size );
+                processEntireContinuation( record );
            }
            else
            {
-                setupStringParameters( record, -LittleEndianConsts.SHORT_SIZE,
-                        getExpectedChars() );
-                byte[] str_data = new byte[totalLengthBytes];
-                int length = SSTRecord.STRING_MINIMAL_OVERHEAD
-                        + ( calculateByteCount( getExpectedChars() ) );
-                byte[] bstring = new byte[length];
-
-                // Copy data from the record into the string
-                // buffer. Copy skips the length of a short in the
-                // string buffer, to leave room for the string length.
-                System.arraycopy( record, 0, str_data,
-                        LittleEndianConsts.SHORT_SIZE,
-                        str_data.length
-                        - LittleEndianConsts.SHORT_SIZE );
-
-                // write the string length
-                LittleEndian.putShort( bstring, 0,
-                        (short) getExpectedChars() );
-
-                // write the options flag
-                bstring[LittleEndianConsts.SHORT_SIZE] =
-                        str_data[LittleEndianConsts.SHORT_SIZE];
-
-                // copy the bytes/words making up the string; skipping
-                // past all the overhead of the str_data array
-                System.arraycopy( str_data, stringDataOffset, bstring,
-                        SSTRecord.STRING_MINIMAL_OVERHEAD,
-                        bstring.length - SSTRecord.STRING_MINIMAL_OVERHEAD );
-
-                // use special constructor to create the final string
-                UnicodeString string =
-                        new UnicodeString( UnicodeString.sid,
-                                (short) bstring.length, bstring,
-                                unfinishedString );
-                Integer integer = new Integer( strings.size() );
-
-//                field_3_strings.put( integer, string );
-                addToStringTable( strings, integer, string );
-                manufactureStrings( record, totalLengthBytes - LittleEndianConsts.SHORT_SIZE, (short) record.length );
+                readStringRemainder( record );
            }
        }
+
+    }
+
+    /**
+     * Reads the remainder string and any subsequent strings from the continuation record.
+     *
+     * @param record  The entire continuation record data.
+     */
+    private void readStringRemainder( final byte[] record )
+    {
+        int stringRemainderSizeInBytes = calculateByteCount( getContinuationExpectedChars() );
+//        stringDataOffset = LittleEndianConsts.BYTE_SIZE;
+        byte[] unicodeStringData = new byte[SSTRecord.STRING_MINIMAL_OVERHEAD
+                + calculateByteCount( getContinuationExpectedChars() )];
+
+        // write the string length
+        LittleEndian.putShort( unicodeStringData, 0, (short) getContinuationExpectedChars() );
+
+        // write the options flag
+        unicodeStringData[LittleEndianConsts.SHORT_SIZE] = createOptionByte( wideChar, richText, extendedText );
+
+        // copy the bytes/words making up the string; skipping
+        // past all the overhead of the str_data array
+        arraycopy( record, LittleEndianConsts.BYTE_SIZE, unicodeStringData,
+                SSTRecord.STRING_MINIMAL_OVERHEAD,
+                unicodeStringData.length - SSTRecord.STRING_MINIMAL_OVERHEAD );
+
+        // use special constructor to create the final string
+        UnicodeString string = new UnicodeString( UnicodeString.sid,
+                (short) unicodeStringData.length, unicodeStringData,
+                unfinishedString );
+        Integer integer = new Integer( strings.size() );
+
+        addToStringTable( strings, integer, string );
+
+        int newOffset = offsetForContinuedRecord( stringRemainderSizeInBytes );
+        manufactureStrings( record, newOffset, (short) ( record.length - newOffset ) );
+    }
+
+    /**
+     * Calculates the size of the string in bytes based on the character width
+     */
+    private int stringSizeInBytes()
+    {
+        return calculateByteCount( charCount );
+    }
+
+    /**
+     * Calculates the size of the string in byes.  This figure includes all the over
+     * heads for the string.
+     */
+    private int totalStringSize()
+    {
+        return stringSizeInBytes()
+                + stringHeaderOverhead()
+                + LittleEndianConsts.INT_SIZE * runCount
+                + extensionLength;
+    }
+
+    private int stringHeaderOverhead()
+    {
+        return SSTRecord.STRING_MINIMAL_OVERHEAD
+                + ( richText ? LittleEndianConsts.SHORT_SIZE : 0 )
+                + ( extendedText ? LittleEndianConsts.INT_SIZE : 0 );
+    }
+
+    private int offsetForContinuedRecord( int stringRemainderSizeInBytes )
+    {
+        return stringRemainderSizeInBytes + LittleEndianConsts.BYTE_SIZE
+                + runCount * LittleEndianConsts.INT_SIZE + extensionLength;
+    }
+
+    private byte createOptionByte( boolean wideChar, boolean richText, boolean farEast )
+    {
+        return (byte) ( ( wideChar ? 1 : 0 ) + ( farEast ? 4 : 0 ) + ( richText ? 8 : 0 ) );
+    }
+
+    /**
+     * If the continued record is so long is spans into the next continue then
+     * simply suck the remaining string data into the existing <code>unfinishedString</code>.
+     *
+     * @param record    The data from the continuation record.
+     */
+    private void processEntireContinuation( final byte[] record )
+    {
+        // create artificial data to create a UnicodeString
+        int dataLengthInBytes = record.length - LittleEndianConsts.BYTE_SIZE;
+        byte[] unicodeStringData = new byte[record.length + LittleEndianConsts.SHORT_SIZE];
+
+        LittleEndian.putShort( unicodeStringData, (byte) 0, (short) calculateCharCount( dataLengthInBytes ) );
+        arraycopy( record, 0, unicodeStringData, LittleEndianConsts.SHORT_SIZE, record.length );
+        UnicodeString ucs = new UnicodeString( UnicodeString.sid, (short) unicodeStringData.length, unicodeStringData );
+
+        unfinishedString = unfinishedString + ucs.getString();
+        setContinuationExpectedChars( getContinuationExpectedChars() - calculateCharCount( dataLengthInBytes ) );
+    }
+
+    private boolean stringSpansContinuation( int continuationSizeInBytes )
+    {
+        return calculateByteCount( getContinuationExpectedChars() ) > continuationSizeInBytes;
    }

    /**
@ -301,12 +426,12 @@ class SSTDeserializer
     *         sub-record in a subsequent continuation record
     */

-    int getExpectedChars()
+    int getContinuationExpectedChars()
    {
        return continuationExpectedChars;
    }

-    private void setExpectedChars( final int count )
+    private void setContinuationExpectedChars( final int count )
    {
        continuationExpectedChars = count;
    }
@ -317,37 +442,116 @@ class SSTDeserializer
    }


+    /**
+     * Copies an array from the specified source array, beginning at the
+     * specified position, to the specified position of the destination array.
+     * A subsequence of array components are copied from the source
+     * array referenced by <code>src</code> to the destination array
+     * referenced by <code>dst</code>. The number of components copied is
+     * equal to the <code>length</code> argument. The components at
+     * positions <code>srcOffset</code> through
+     * <code>srcOffset+length-1</code> in the source array are copied into
+     * positions <code>dstOffset</code> through
+     * <code>dstOffset+length-1</code>, respectively, of the destination
+     * array.
+     * <p>
+     * If the <code>src</code> and <code>dst</code> arguments refer to the
+     * same array object, then the copying is performed as if the
+     * components at positions <code>srcOffset</code> through
+     * <code>srcOffset+length-1</code> were first copied to a temporary
+     * array with <code>length</code> components and then the contents of
+     * the temporary array were copied into positions
+     * <code>dstOffset</code> through <code>dstOffset+length-1</code> of the
+     * destination array.
+     * <p>
+     * If <code>dst</code> is <code>null</code>, then a
+     * <code>NullPointerException</code> is thrown.
+     * <p>
+     * If <code>src</code> is <code>null</code>, then a
+     * <code>NullPointerException</code> is thrown and the destination
+     * array is not modified.
+     * <p>
+     * Otherwise, if any of the following is true, an
+     * <code>ArrayStoreException</code> is thrown and the destination is
+     * not modified:
+     * <ul>
+     * <li>The <code>src</code> argument refers to an object that is not an
+     *     array.
+     * <li>The <code>dst</code> argument refers to an object that is not an
+     *     array.
+     * <li>The <code>src</code> argument and <code>dst</code> argument refer to
+     *     arrays whose component types are different primitive types.
+     * <li>The <code>src</code> argument refers to an array with a primitive
+     *     component type and the <code>dst</code> argument refers to an array
+     *     with a reference component type.
+     * <li>The <code>src</code> argument refers to an array with a reference
+     *     component type and the <code>dst</code> argument refers to an array
+     *     with a primitive component type.
+     * </ul>
+     * <p>
+     * Otherwise, if any of the following is true, an
+     * <code>IndexOutOfBoundsException</code> is
+     * thrown and the destination is not modified:
+     * <ul>
+     * <li>The <code>srcOffset</code> argument is negative.
+     * <li>The <code>dstOffset</code> argument is negative.
+     * <li>The <code>length</code> argument is negative.
+     * <li><code>srcOffset+length</code> is greater than
+     *     <code>src.length</code>, the length of the source array.
+     * <li><code>dstOffset+length</code> is greater than
+     *     <code>dst.length</code>, the length of the destination array.
+     * </ul>
+     * <p>
+     * Otherwise, if any actual component of the source array from
+     * position <code>srcOffset</code> through
+     * <code>srcOffset+length-1</code> cannot be converted to the component
+     * type of the destination array by assignment conversion, an
+     * <code>ArrayStoreException</code> is thrown. In this case, let
+     * <b><i>k</i></b> be the smallest nonnegative integer less than
+     * length such that <code>src[srcOffset+</code><i>k</i><code>]</code>
+     * cannot be converted to the component type of the destination
+     * array; when the exception is thrown, source array components from
+     * positions <code>srcOffset</code> through
+     * <code>srcOffset+</code><i>k</i><code>-1</code>
+     * will already have been copied to destination array positions
+     * <code>dstOffset</code> through
+     * <code>dstOffset+</code><i>k</I><code>-1</code> and no other
+     * positions of the destination array will have been modified.
+     * (Because of the restrictions already itemized, this
+     * paragraph effectively applies only to the situation where both
+     * arrays have component types that are reference types.)
+     *
+     * @param      src          the source array.
+     * @param      src_position start position in the source array.
+     * @param      dst          the destination array.
+     * @param      dst_position pos   start position in the destination data.
+     * @param      length       the number of array elements to be copied.
+     * @exception  IndexOutOfBoundsException  if copying would cause
+     *               access of data outside array bounds.
+     * @exception  ArrayStoreException  if an element in the <code>src</code>
+     *               array could not be stored into the <code>dest</code> array
+     *               because of a type mismatch.
+     * @exception  NullPointerException if either <code>src</code> or
+     *               <code>dst</code> is <code>null</code>.
+     */
+    private void arraycopy( byte[] src, int src_position,
+                            byte[] dst, int dst_position,
+                            int length )
+    {
+        System.arraycopy( src, src_position, dst, dst_position, length );
+    }
+
    /**
     * @return the unfinished string
     */
-
    String getUnfinishedString()
    {
        return unfinishedString;
    }

-    /**
-     * @return the total length of the current string
-     */
-
-    int getTotalLength()
-    {
-        return totalLengthBytes;
-    }
-
-    /**
-     * @return offset into current string data
-     */
-
-    int getStringDataOffset()
-    {
-        return stringDataOffset;
-    }
-
    /**
     * @return true if current string uses wide characters
     */
-
    boolean isWideChar()
    {
        return wideChar;
--- a/src/java/org/apache/poi/hssf/record/SSTRecord.java
+++ b/src/java/org/apache/poi/hssf/record/SSTRecord.java
@ -478,7 +478,7 @@ public class SSTRecord
        field_2_num_unique_strings = LittleEndian.getInt( data, 4 + offset );
        field_3_strings = new BinaryTree();
        deserializer = new SSTDeserializer(field_3_strings);
-        deserializer.manufactureStrings( data, 8 + offset, size );
+        deserializer.manufactureStrings( data, 8 + offset, (short)(size - 8) );
    }


--- a/src/java/org/apache/poi/hssf/record/UnicodeString.java
+++ b/src/java/org/apache/poi/hssf/record/UnicodeString.java
@ -79,6 +79,7 @@ public class UnicodeString
    private byte              field_2_optionflags;   // = 0;
    private String            field_3_string;        // = null;
    private final int RICH_TEXT_BIT = 8;
+    private final int EXT_BIT = 4;

    public UnicodeString()
    {
@ -364,4 +365,9 @@ public class UnicodeString
        return rval;
    }

+    public boolean isExtendedText()
+    {
+        return (getOptionFlags() & EXT_BIT) != 0;
+    }
+
 }
--- a/src/java/org/apache/poi/util/HexDump.java
+++ b/src/java/org/apache/poi/util/HexDump.java
@ -81,6 +81,7 @@ public class HexDump
     * @param stream the OutputStream to which the data is to be
     *               written
     * @param index initial index into the byte array
+     * @param length number of characters to output
     *
     * @exception IOException is thrown if anything goes wrong writing
     *            the data to stream
@ -89,11 +90,10 @@ public class HexDump
     * @exception IllegalArgumentException if the output stream is
     *            null
     */
-
    public synchronized static void dump(final byte [] data, final long offset,
-                            final OutputStream stream, final int index)
-        throws IOException, ArrayIndexOutOfBoundsException,
-                IllegalArgumentException
+                            final OutputStream stream, final int index, final int length)
+            throws IOException, ArrayIndexOutOfBoundsException,
+                    IllegalArgumentException
    {
        if ((index < 0) || (index >= data.length))
        {
@ -108,9 +108,11 @@ public class HexDump
        long         display_offset = offset + index;
        StringBuffer buffer         = new StringBuffer(74);

-        for (int j = index; j < data.length; j += 16)
+
+        int data_length = Math.min(data.length,index+length);
+        for (int j = index; j < data_length; j += 16)
        {
-            int chars_read = data.length - j;
+            int chars_read = data_length - j;

            if (chars_read > 16)
            {
@ -146,6 +148,32 @@ public class HexDump
            buffer.setLength(0);
            display_offset += chars_read;
        }
+
+    }
+
+    /**
+     * dump an array of bytes to an OutputStream
+     *
+     * @param data the byte array to be dumped
+     * @param offset its offset, whatever that might mean
+     * @param stream the OutputStream to which the data is to be
+     *               written
+     * @param index initial index into the byte array
+     *
+     * @exception IOException is thrown if anything goes wrong writing
+     *            the data to stream
+     * @exception ArrayIndexOutOfBoundsException if the index is
+     *            outside the data array's bounds
+     * @exception IllegalArgumentException if the output stream is
+     *            null
+     */
+
+    public synchronized static void dump(final byte [] data, final long offset,
+                            final OutputStream stream, final int index)
+        throws IOException, ArrayIndexOutOfBoundsException,
+                IllegalArgumentException
+    {
+        dump(data, offset, stream, index, data.length-index);
    }

    public static final String        EOL         =
--- a/src/java/org/apache/poi/util/LittleEndian.java
+++ b/src/java/org/apache/poi/util/LittleEndian.java
@ -236,13 +236,27 @@ public class LittleEndian
     *
     * @exception ArrayIndexOutOfBoundsException may be thrown
     */
-
    public static void putShort(final byte[] data, final int offset,
                                final short value)
    {
        putNumber(data, offset, value, SHORT_SIZE);
    }

+    /**
+     * put an unsigned short value into a byte array
+     *
+     * @param data the byte array
+     * @param offset a starting offset into the byte array
+     * @param value the short (16-bit) value
+     *
+     * @exception ArrayIndexOutOfBoundsException may be thrown
+     */
+    public static void putUShort(final byte[] data, final int offset,
+                                final int value)
+    {
+        putNumber(data, offset, value, SHORT_SIZE);
+    }
+
    /**
     * put a array of shorts into a byte array
     *
--- a/src/testcases/org/apache/poi/hssf/data/BigSSTRecord
+++ b/src/testcases/org/apache/poi/hssf/data/BigSSTRecord
--- a/src/testcases/org/apache/poi/hssf/data/evencontinuation.txt
+++ b/src/testcases/org/apache/poi/hssf/data/evencontinuation.txt
@ -0,0 +1,16 @@
+14 00                                               # String length 0x14=20
+01                                                  # Option flag, 16bit
+# String: At a dinner party or
+41 00 74 00 20 00 61 00 20 00
+64 00 69 00 6E 00 6E 00 65 00
+72 00 20 00 70 00 61 00 72 00
+74 00 79 00 20 00 6F 00 72 00
+
+# Continuation record (new string on the boundry)
+11 00                                               # String length 0x11=17
+00                                                  # Option flag, 8bit
+# String: At a dinner party
+41 74 20 61 20
+64 69 6E 6E 65
+72 20 70 61 72
+74 79
--- a/src/testcases/org/apache/poi/hssf/data/richtextdata.txt
+++ b/src/testcases/org/apache/poi/hssf/data/richtextdata.txt
@ -0,0 +1,21 @@
+1D 00                                               # String length 0x1b=29
+09                                                  # Option flag, rich text + 16bit
+02 00                                               # Formatting runs
+# String: At a dinner party or
+41 00 74 00 20 00 61 00 20 00
+64 00 69 00 6E 00 6E 00 65 00
+72 00 20 00 70 00 61 00 72 00
+74 00 79 00 20 00 6F 00 72 00
+
+# Continuation record
+00                                                  # option flag
+
+# string:at at at
+41 74 20
+41 74 20
+41 74 20
+
+00 00                                               # Formatting run 1, first formated char at 0
+00 00                                               # Formatting run 1, Index to font record
+02 00                                               # Formatting run 2, first formated char at 2
+00 00                                               # Formatting run 2, Index to font record
--- a/src/testcases/org/apache/poi/hssf/data/stringacross2continuations.txt
+++ b/src/testcases/org/apache/poi/hssf/data/stringacross2continuations.txt
@ -0,0 +1,7 @@
+14 00                                               # String length 0x14=20
+01                                                  # Option flag, 16bit
+# String: At a dinner party or
+41 00 74 00 20 00 61 00 20 00
+64 00 69 00 6E 00 6E 00 65 00
+72 00 20 00 70 00 61 00 72 00
+74 00 79 00 20 00 6F 00 72 00
--- a/src/testcases/org/apache/poi/hssf/data/stringacross2continuationsCR1.txt
+++ b/src/testcases/org/apache/poi/hssf/data/stringacross2continuationsCR1.txt
@ -0,0 +1,9 @@
+
+# Continuation record
+22 00                                               # String length 0x11=17
+00                                                  # Option flag, 8bit
+# String: At a dinner party
+41 74 20 61 20
+64 69 6E 6E 65
+72 20 70 61 72
+74 79
--- a/src/testcases/org/apache/poi/hssf/data/stringacross2continuationsCR2.txt
+++ b/src/testcases/org/apache/poi/hssf/data/stringacross2continuationsCR2.txt
@ -0,0 +1,7 @@
+# Continuation record
+00                                                  # option flag
+# String: At a dinner party
+41 74 20 61 20
+64 69 6E 6E 65
+72 20 70 61 72
+74 79
--- a/src/testcases/org/apache/poi/hssf/record/TestSSTRecord.java
+++ b/src/testcases/org/apache/poi/hssf/record/TestSSTRecord.java
@ -55,19 +55,20 @@
 package org.apache.poi.hssf.record;

 import junit.framework.TestCase;
+import org.apache.poi.hssf.usermodel.HSSFSheet;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.util.BinaryTree;
+import org.apache.poi.util.HexRead;
 import org.apache.poi.util.LittleEndian;
 import org.apache.poi.util.LittleEndianConsts;
-import org.apache.poi.hssf.usermodel.HSSFWorkbook;
-import org.apache.poi.hssf.usermodel.HSSFSheet;

 import java.io.*;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
-import java.util.List;

 /**
 * @author Marc Johnson (mjohnson at apache dot org)
+ * @author Glen Stampoultzis (glens at apache.org)
 */

 public class TestSSTRecord
@ -98,14 +99,14 @@ public class TestSSTRecord
    public void testProcessContinueRecord()
            throws IOException
    {
-        byte[] testdata = readTestData( "BigSSTRecord" );
+        byte[] testdata = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord" );
        byte[] input = new byte[testdata.length - 4];

        System.arraycopy( testdata, 4, input, 0, input.length );
        SSTRecord record =
                new SSTRecord( LittleEndian.getShort( testdata, 0 ),
                        LittleEndian.getShort( testdata, 2 ), input );
-        byte[] continueRecord = readTestData( "BigSSTRecordCR" );
+        byte[] continueRecord = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecordCR" );

        input = new byte[continueRecord.length - 4];
        System.arraycopy( continueRecord, 4, input, 0, input.length );
@ -141,42 +142,42 @@ public class TestSSTRecord
        assertEquals( record, testRecord );

        // testing based on new bug report
-        testdata = readTestData( "BigSSTRecord2" );
+        testdata = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2" );
        input = new byte[testdata.length - 4];
        System.arraycopy( testdata, 4, input, 0, input.length );
        record = new SSTRecord( LittleEndian.getShort( testdata, 0 ),
                LittleEndian.getShort( testdata, 2 ), input );
-        byte[] continueRecord1 = readTestData( "BigSSTRecord2CR1" );
+        byte[] continueRecord1 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR1" );

        input = new byte[continueRecord1.length - 4];
        System.arraycopy( continueRecord1, 4, input, 0, input.length );
        record.processContinueRecord( input );
-        byte[] continueRecord2 = readTestData( "BigSSTRecord2CR2" );
+        byte[] continueRecord2 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR2" );

        input = new byte[continueRecord2.length - 4];
        System.arraycopy( continueRecord2, 4, input, 0, input.length );
        record.processContinueRecord( input );
-        byte[] continueRecord3 = readTestData( "BigSSTRecord2CR3" );
+        byte[] continueRecord3 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR3" );

        input = new byte[continueRecord3.length - 4];
        System.arraycopy( continueRecord3, 4, input, 0, input.length );
        record.processContinueRecord( input );
-        byte[] continueRecord4 = readTestData( "BigSSTRecord2CR4" );
+        byte[] continueRecord4 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR4" );

        input = new byte[continueRecord4.length - 4];
        System.arraycopy( continueRecord4, 4, input, 0, input.length );
        record.processContinueRecord( input );
-        byte[] continueRecord5 = readTestData( "BigSSTRecord2CR5" );
+        byte[] continueRecord5 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR5" );

        input = new byte[continueRecord5.length - 4];
        System.arraycopy( continueRecord5, 4, input, 0, input.length );
        record.processContinueRecord( input );
-        byte[] continueRecord6 = readTestData( "BigSSTRecord2CR6" );
+        byte[] continueRecord6 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR6" );

        input = new byte[continueRecord6.length - 4];
        System.arraycopy( continueRecord6, 4, input, 0, input.length );
        record.processContinueRecord( input );
-        byte[] continueRecord7 = readTestData( "BigSSTRecord2CR7" );
+        byte[] continueRecord7 = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord2CR7" );

        input = new byte[continueRecord7.length - 4];
        System.arraycopy( continueRecord7, 4, input, 0, input.length );
@ -208,6 +209,7 @@ public class TestSSTRecord
        }
        assertEquals( offset, ser_output.length );
        assertEquals( record, testRecord );
+        assertEquals( record.countStrings(), testRecord.countStrings() );
    }

    /**
@ -333,7 +335,6 @@ public class TestSSTRecord
     *
     * @exception IOException
     */
-
    public void testSSTRecordBug()
            throws IOException
    {
@ -366,7 +367,6 @@ public class TestSSTRecord
    /**
     * test simple addString
     */
-
    public void testSimpleAddString()
    {
        SSTRecord record = new SSTRecord();
@ -420,7 +420,7 @@ public class TestSSTRecord
    public void testReaderConstructor()
            throws IOException
    {
-        byte[] testdata = readTestData( "BigSSTRecord" );
+        byte[] testdata = HexRead.readTestData( _test_file_path + File.separator + "BigSSTRecord" );
        byte[] input = new byte[testdata.length - 4];

        System.arraycopy( testdata, 4, input, 0, input.length );
@ -431,11 +431,11 @@ public class TestSSTRecord
        assertEquals( 1464, record.getNumStrings() );
        assertEquals( 688, record.getNumUniqueStrings() );
        assertEquals( 492, record.countStrings() );
-        assertEquals( 1, record.getDeserializer().getExpectedChars() );
+        assertEquals( 1, record.getDeserializer().getContinuationExpectedChars() );
        assertEquals( "Consolidated B-24J Liberator The Dragon & His Tai",
                record.getDeserializer().getUnfinishedString() );
-        assertEquals( 52, record.getDeserializer().getTotalLength() );
-        assertEquals( 3, record.getDeserializer().getStringDataOffset() );
+//        assertEquals( 52, record.getDeserializer().getTotalLength() );
+//        assertEquals( 3, record.getDeserializer().getStringDataOffset() );
        assertTrue( !record.getDeserializer().isWideChar() );
    }

@ -450,10 +450,10 @@ public class TestSSTRecord
        assertEquals( 0, record.getNumStrings() );
        assertEquals( 0, record.getNumUniqueStrings() );
        assertEquals( 0, record.countStrings() );
-        assertEquals( 0, record.getDeserializer().getExpectedChars() );
+        assertEquals( 0, record.getDeserializer().getContinuationExpectedChars() );
        assertEquals( "", record.getDeserializer().getUnfinishedString() );
-        assertEquals( 0, record.getDeserializer().getTotalLength() );
-        assertEquals( 0, record.getDeserializer().getStringDataOffset() );
+//        assertEquals( 0, record.getDeserializer().getTotalLength() );
+//        assertEquals( 0, record.getDeserializer().getStringDataOffset() );
        assertTrue( !record.getDeserializer().isWideChar() );
        byte[] output = record.serialize();
        byte[] expected =
@ -482,99 +482,6 @@ public class TestSSTRecord
        junit.textui.TestRunner.run( TestSSTRecord.class );
    }

-    private byte[] readTestData( String filename )
-            throws IOException
-    {
-        File file = new File( _test_file_path
-                + File.separator
-                + filename );
-        FileInputStream stream = new FileInputStream( file );
-        int characterCount = 0;
-        byte b = (byte) 0;
-        List bytes = new ArrayList();
-        boolean done = false;
-
-        while ( !done )
-        {
-            int count = stream.read();
-
-            switch ( count )
-            {
-
-                case '0':
-                case '1':
-                case '2':
-                case '3':
-                case '4':
-                case '5':
-                case '6':
-                case '7':
-                case '8':
-                case '9':
-                    b <<= 4;
-                    b += (byte) ( count - '0' );
-                    characterCount++;
-                    if ( characterCount == 2 )
-                    {
-                        bytes.add( new Byte( b ) );
-                        characterCount = 0;
-                        b = (byte) 0;
-                    }
-                    break;
-
-                case 'A':
-                case 'B':
-                case 'C':
-                case 'D':
-                case 'E':
-                case 'F':
-                    b <<= 4;
-                    b += (byte) ( count + 10 - 'A' );
-                    characterCount++;
-                    if ( characterCount == 2 )
-                    {
-                        bytes.add( new Byte( b ) );
-                        characterCount = 0;
-                        b = (byte) 0;
-                    }
-                    break;
-
-                case 'a':
-                case 'b':
-                case 'c':
-                case 'd':
-                case 'e':
-                case 'f':
-                    b <<= 4;
-                    b += (byte) ( count + 10 - 'a' );
-                    characterCount++;
-                    if ( characterCount == 2 )
-                    {
-                        bytes.add( new Byte( b ) );
-                        characterCount = 0;
-                        b = (byte) 0;
-                    }
-                    break;
-
-                case -1:
-                    done = true;
-                    break;
-
-                default :
-                    break;
-            }
-        }
-        stream.close();
-        Byte[] polished = (Byte[]) bytes.toArray( new Byte[0] );
-        byte[] rval = new byte[polished.length];
-
-        for ( int j = 0; j < polished.length; j++ )
-        {
-            rval[j] = polished[j].byteValue();
-        }
-        return rval;
-    }
-
    /**
     * Tests that workbooks with rich text that duplicates a non rich text cell can be read and written.
     */
@ -582,38 +489,86 @@ public class TestSSTRecord
            throws Exception
    {
        File file = new File( _test_file_path + File.separator + "duprich1.xls" );
-        InputStream stream = new FileInputStream(file);
-        HSSFWorkbook wb = new HSSFWorkbook(stream);
+        InputStream stream = new FileInputStream( file );
+        HSSFWorkbook wb = new HSSFWorkbook( stream );
        stream.close();
-        HSSFSheet sheet = wb.getSheetAt(1);
-        assertEquals("01/05 (Wed) ", sheet.getRow(0).getCell((short)8).getStringCellValue());
-        assertEquals("01/05 (Wed)", sheet.getRow(1).getCell((short)8).getStringCellValue());
+        HSSFSheet sheet = wb.getSheetAt( 1 );
+        assertEquals( "01/05 (Wed) ", sheet.getRow( 0 ).getCell( (short) 8 ).getStringCellValue() );
+        assertEquals( "01/05 (Wed)", sheet.getRow( 1 ).getCell( (short) 8 ).getStringCellValue() );

-        file = File.createTempFile("testout", "xls");
-        FileOutputStream outStream = new FileOutputStream(file);
-        wb.write(outStream);
+        file = File.createTempFile( "testout", "xls" );
+        FileOutputStream outStream = new FileOutputStream( file );
+        wb.write( outStream );
        outStream.close();
        file.delete();

        // test the second file.
        file = new File( _test_file_path + File.separator + "duprich2.xls" );
-        stream = new FileInputStream(file);
-        wb = new HSSFWorkbook(stream);
+        stream = new FileInputStream( file );
+        wb = new HSSFWorkbook( stream );
        stream.close();
-        sheet = wb.getSheetAt(0);
+        sheet = wb.getSheetAt( 0 );
        int row = 0;
-        assertEquals("Testing ", sheet.getRow(row++).getCell((short)0).getStringCellValue());
-        assertEquals("rich", sheet.getRow(row++).getCell((short)0).getStringCellValue());
-        assertEquals("text", sheet.getRow(row++).getCell((short)0).getStringCellValue());
-        assertEquals("strings", sheet.getRow(row++).getCell((short)0).getStringCellValue());
-        assertEquals("Testing  ", sheet.getRow(row++).getCell((short)0).getStringCellValue());
-        assertEquals("Testing", sheet.getRow(row++).getCell((short)0).getStringCellValue());
+        assertEquals( "Testing ", sheet.getRow( row++ ).getCell( (short) 0 ).getStringCellValue() );
+        assertEquals( "rich", sheet.getRow( row++ ).getCell( (short) 0 ).getStringCellValue() );
+        assertEquals( "text", sheet.getRow( row++ ).getCell( (short) 0 ).getStringCellValue() );
+        assertEquals( "strings", sheet.getRow( row++ ).getCell( (short) 0 ).getStringCellValue() );
+        assertEquals( "Testing  ", sheet.getRow( row++ ).getCell( (short) 0 ).getStringCellValue() );
+        assertEquals( "Testing", sheet.getRow( row++ ).getCell( (short) 0 ).getStringCellValue() );

 //        file = new File("/tryme.xls");
-        file = File.createTempFile("testout", ".xls");
-        outStream = new FileOutputStream(file);
-        wb.write(outStream);
+        file = File.createTempFile( "testout", ".xls" );
+        outStream = new FileOutputStream( file );
+        wb.write( outStream );
        outStream.close();
        file.delete();
    }
+
+    public void testSpanRichTextToPlainText()
+            throws Exception
+    {
+        byte[] bytes = HexRead.readTestData( _test_file_path + File.separator + "richtextdata.txt" );
+        BinaryTree strings = new BinaryTree();
+        SSTDeserializer deserializer = new SSTDeserializer( strings );
+        deserializer.manufactureStrings( bytes, 0, (short) 45 );
+        byte[] continueBytes = new byte[bytes.length - 45];
+        System.arraycopy( bytes, 45, continueBytes, 0, bytes.length - 45 );
+        deserializer.processContinueRecord( continueBytes );
+//        System.out.println( "strings.getKeyForValue(new Integer(0)) = " + strings.get( new Integer( 0 ) ) );
+
+        assertEquals( "At a dinner party orAt At At ", strings.get( new Integer( 0 ) ) + "" );
+    }
+
+    public void testContinuationWithNoOverlap()
+            throws Exception
+    {
+        byte[] bytes = HexRead.readTestData( _test_file_path + File.separator + "evencontinuation.txt" );
+        BinaryTree strings = new BinaryTree();
+        SSTDeserializer deserializer = new SSTDeserializer( strings );
+        deserializer.manufactureStrings( bytes, 0, (short) 43 );
+        byte[] continueBytes = new byte[bytes.length - 43];
+        System.arraycopy( bytes, 43, continueBytes, 0, bytes.length - 43 );
+        deserializer.processContinueRecord( continueBytes );
+
+        assertEquals( "At a dinner party or", strings.get( new Integer( 0 ) ) + "" );
+        assertEquals( "At a dinner party", strings.get( new Integer( 1 ) ) + "" );
+
+    }
+
+    public void testStringAcross2Continuations()
+            throws Exception
+    {
+        byte[] bytes = HexRead.readTestData( _test_file_path + File.separator + "stringacross2continuations.txt" );
+        BinaryTree strings = new BinaryTree();
+        SSTDeserializer deserializer = new SSTDeserializer( strings );
+        deserializer.manufactureStrings( bytes, 0, (short) 43 );
+        bytes = HexRead.readTestData( _test_file_path + File.separator + "stringacross2continuationsCR1.txt" );
+        deserializer.processContinueRecord( bytes );
+        bytes = HexRead.readTestData( _test_file_path + File.separator + "stringacross2continuationsCR2.txt" );
+        deserializer.processContinueRecord( bytes );
+
+        assertEquals( "At a dinner party or", strings.get( new Integer( 0 ) ) + "" );
+        assertEquals( "At a dinner partyAt a dinner party", strings.get( new Integer( 1 ) ) + "" );
+
+    }
 }