/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and * "Apache POI" must not be used to endorse or promote products * derived from this software without prior written permission. For * written permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * "Apache POI", nor may "Apache" appear in their name, without * prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * . */ package org.apache.poi.hssf.record; import org.apache.poi.util.BinaryTree; import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndianConsts; /** * Handles the task of deserializing a SST string. The two main entry points are * * @author Glen Stampoultzis (glens at apache.org) */ class SSTDeserializer { private BinaryTree strings; /** this is the number of characters we expect in the first sub-record in a subsequent continuation record */ private int continuationExpectedChars; /** this is the string we were working on before hitting the end of the current record. This string is NOT finished. */ private String unfinishedString; /** this is true if the string uses wide characters */ private boolean wideChar; /** this is true if the string is a rich text string */ private boolean richText; /** this is true if the string is a far east string or some other wierd string */ private boolean extendedText; /** Number of formatting runs in this rich text field */ private short runCount; /** Number of characters in current string */ private int charCount; private int extensionLength; public SSTDeserializer( BinaryTree strings ) { this.strings = strings; initVars(); } private void initVars() { runCount = 0; continuationExpectedChars = 0; unfinishedString = ""; // bytesInCurrentSegment = 0; // stringDataOffset = 0; wideChar = false; richText = false; extendedText = false; } /** * This is the starting point where strings are constructed. Note that * strings may span across multiple continuations. Read the SST record * carefully before beginning to hack. */ public void manufactureStrings( final byte[] data, final int initialOffset, short dataSize ) { initVars(); int offset = initialOffset; while ( ( offset - initialOffset ) < dataSize ) { int remaining = dataSize - offset + initialOffset; if ( ( remaining > 0 ) && ( remaining < LittleEndianConsts.SHORT_SIZE ) ) { throw new RecordFormatException( "Cannot get length of the last string in SSTRecord" ); } if ( remaining == LittleEndianConsts.SHORT_SIZE ) { setContinuationExpectedChars( LittleEndian.getUShort( data, offset ) ); unfinishedString = ""; break; } charCount = LittleEndian.getUShort( data, offset ); readStringHeader( data, offset ); boolean stringContinuesOverContinuation = remaining < totalStringSize(); if ( stringContinuesOverContinuation ) { int remainingBytes = ( initialOffset + dataSize ) - offset - stringHeaderOverhead(); setContinuationExpectedChars( charCount - calculateCharCount( remainingBytes ) ); charCount -= getContinuationExpectedChars(); } else { setContinuationExpectedChars( 0 ); } processString( data, offset, charCount ); offset += totalStringSize(); if ( getContinuationExpectedChars() != 0 ) { break; } } } // private void dump( final byte[] data, int offset, int length ) // { // try // { // System.out.println( "------------------- SST DUMP -------------------------" ); // HexDump.dump( (byte[]) data, offset, System.out, offset, length ); // } // catch ( IOException e ) // { // } // catch ( ArrayIndexOutOfBoundsException e ) // { // } // catch ( IllegalArgumentException e ) // { // } // } /** * Detemines the option types for the string (ie, compressed or uncompressed unicode, rich text string or * plain string etc) and calculates the length and offset for the string. * */ private void readStringHeader( final byte[] data, final int index ) { byte optionFlag = data[index + LittleEndianConsts.SHORT_SIZE]; wideChar = ( optionFlag & 1 ) == 1; extendedText = ( optionFlag & 4 ) == 4; richText = ( optionFlag & 8 ) == 8; runCount = 0; if ( richText ) { runCount = LittleEndian.getShort( data, index + SSTRecord.STRING_MINIMAL_OVERHEAD ); } extensionLength = 0; if ( extendedText ) { extensionLength = LittleEndian.getInt( data, index + SSTRecord.STRING_MINIMAL_OVERHEAD + (richText ? LittleEndianConsts.SHORT_SIZE : 0) ); } } /** * Reads a string or the first part of a string. * * @param characters the number of characters to write. * * @return the number of bytes written. */ private int processString( final byte[] data, final int dataIndex, final int characters ) { // length is the length we store it as. not the length that is read. int length = SSTRecord.STRING_MINIMAL_OVERHEAD + calculateByteCount( characters ); byte[] unicodeStringBuffer = new byte[length]; int offset = 0; // Set the length in characters LittleEndian.putUShort( unicodeStringBuffer, offset, characters ); offset += LittleEndianConsts.SHORT_SIZE; // Set the option flags unicodeStringBuffer[offset] = data[dataIndex + offset]; // Copy in the string data int bytesRead = unicodeStringBuffer.length - SSTRecord.STRING_MINIMAL_OVERHEAD; arraycopy( data, dataIndex + stringHeaderOverhead(), unicodeStringBuffer, SSTRecord.STRING_MINIMAL_OVERHEAD, bytesRead ); // Create the unicode string UnicodeString string = new UnicodeString( UnicodeString.sid, (short) unicodeStringBuffer.length, unicodeStringBuffer ); if ( isStringFinished() ) { Integer integer = new Integer( strings.size() ); addToStringTable( strings, integer, string ); } else { unfinishedString = string.getString(); } return bytesRead; } private boolean isStringFinished() { return getContinuationExpectedChars() == 0; } /** * Okay, we are doing some major cheating here. Because we can't handle rich text strings properly * we end up getting duplicate strings. To get around this I'm doing two things: 1. Converting rich * text to normal text and 2. If there's a duplicate I'm adding a space onto the end. Sneaky perhaps * but it gets the job done until we can handle this a little better. */ static public void addToStringTable( BinaryTree strings, Integer integer, UnicodeString string ) { if ( string.isRichText() ) string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~8 ) ) ); if ( string.isExtendedText() ) string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~4 ) ) ); boolean added = false; while ( added == false ) { try { strings.put( integer, string ); added = true; } catch ( Exception ignore ) { string.setString( string.getString() + " " ); } } } private int calculateCharCount( final int byte_count ) { return byte_count / ( wideChar ? LittleEndianConsts.SHORT_SIZE : LittleEndianConsts.BYTE_SIZE ); } /** * Process a Continue record. A Continue record for an SST record * contains the same kind of data that the SST record contains, * with the following exceptions: *

*

    *
  1. The string counts at the beginning of the SST record are * not in the Continue record *
  2. The first string in the Continue record might NOT begin * with a size. If the last string in the previous record is * continued in this record, the size is determined by that * last string in the previous record; the first string will * begin with a flag byte, followed by the remaining bytes (or * words) of the last string from the previous * record. Otherwise, the first string in the record will * begin with a string length *
* * @param record the Continue record's byte data */ public void processContinueRecord( final byte[] record ) { if ( isStringFinished() ) { initVars(); manufactureStrings( record, 0, (short) record.length ); } else { // reset the wide bit because that can change across a continuation. the fact that it's // actually rich text doesn't change across continuations even though the rich text // may on longer be set in the "new" option flag. confusing huh? wideChar = ( record[0] & 1 ) == 1; if ( stringSpansContinuation( record.length - LittleEndianConsts.BYTE_SIZE ) ) { processEntireContinuation( record ); } else { readStringRemainder( record ); } } } /** * Reads the remainder string and any subsequent strings from the continuation record. * * @param record The entire continuation record data. */ private void readStringRemainder( final byte[] record ) { int stringRemainderSizeInBytes = calculateByteCount( getContinuationExpectedChars() ); // stringDataOffset = LittleEndianConsts.BYTE_SIZE; byte[] unicodeStringData = new byte[SSTRecord.STRING_MINIMAL_OVERHEAD + calculateByteCount( getContinuationExpectedChars() )]; // write the string length LittleEndian.putShort( unicodeStringData, 0, (short) getContinuationExpectedChars() ); // write the options flag unicodeStringData[LittleEndianConsts.SHORT_SIZE] = createOptionByte( wideChar, richText, extendedText ); // copy the bytes/words making up the string; skipping // past all the overhead of the str_data array arraycopy( record, LittleEndianConsts.BYTE_SIZE, unicodeStringData, SSTRecord.STRING_MINIMAL_OVERHEAD, unicodeStringData.length - SSTRecord.STRING_MINIMAL_OVERHEAD ); // use special constructor to create the final string UnicodeString string = new UnicodeString( UnicodeString.sid, (short) unicodeStringData.length, unicodeStringData, unfinishedString ); Integer integer = new Integer( strings.size() ); addToStringTable( strings, integer, string ); int newOffset = offsetForContinuedRecord( stringRemainderSizeInBytes ); manufactureStrings( record, newOffset, (short) ( record.length - newOffset ) ); } /** * Calculates the size of the string in bytes based on the character width */ private int stringSizeInBytes() { return calculateByteCount( charCount ); } /** * Calculates the size of the string in byes. This figure includes all the over * heads for the string. */ private int totalStringSize() { return stringSizeInBytes() + stringHeaderOverhead() + LittleEndianConsts.INT_SIZE * runCount + extensionLength; } private int stringHeaderOverhead() { return SSTRecord.STRING_MINIMAL_OVERHEAD + ( richText ? LittleEndianConsts.SHORT_SIZE : 0 ) + ( extendedText ? LittleEndianConsts.INT_SIZE : 0 ); } private int offsetForContinuedRecord( int stringRemainderSizeInBytes ) { return stringRemainderSizeInBytes + LittleEndianConsts.BYTE_SIZE + runCount * LittleEndianConsts.INT_SIZE + extensionLength; } private byte createOptionByte( boolean wideChar, boolean richText, boolean farEast ) { return (byte) ( ( wideChar ? 1 : 0 ) + ( farEast ? 4 : 0 ) + ( richText ? 8 : 0 ) ); } /** * If the continued record is so long is spans into the next continue then * simply suck the remaining string data into the existing unfinishedString. * * @param record The data from the continuation record. */ private void processEntireContinuation( final byte[] record ) { // create artificial data to create a UnicodeString int dataLengthInBytes = record.length - LittleEndianConsts.BYTE_SIZE; byte[] unicodeStringData = new byte[record.length + LittleEndianConsts.SHORT_SIZE]; LittleEndian.putShort( unicodeStringData, (byte) 0, (short) calculateCharCount( dataLengthInBytes ) ); arraycopy( record, 0, unicodeStringData, LittleEndianConsts.SHORT_SIZE, record.length ); UnicodeString ucs = new UnicodeString( UnicodeString.sid, (short) unicodeStringData.length, unicodeStringData ); unfinishedString = unfinishedString + ucs.getString(); setContinuationExpectedChars( getContinuationExpectedChars() - calculateCharCount( dataLengthInBytes ) ); } private boolean stringSpansContinuation( int continuationSizeInBytes ) { return calculateByteCount( getContinuationExpectedChars() ) > continuationSizeInBytes; } /** * @return the number of characters we expect in the first * sub-record in a subsequent continuation record */ int getContinuationExpectedChars() { return continuationExpectedChars; } private void setContinuationExpectedChars( final int count ) { continuationExpectedChars = count; } private int calculateByteCount( final int character_count ) { return character_count * ( wideChar ? LittleEndianConsts.SHORT_SIZE : LittleEndianConsts.BYTE_SIZE ); } /** * Copies an array from the specified source array, beginning at the * specified position, to the specified position of the destination array. * A subsequence of array components are copied from the source * array referenced by src to the destination array * referenced by dst. The number of components copied is * equal to the length argument. The components at * positions srcOffset through * srcOffset+length-1 in the source array are copied into * positions dstOffset through * dstOffset+length-1, respectively, of the destination * array. *

* If the src and dst arguments refer to the * same array object, then the copying is performed as if the * components at positions srcOffset through * srcOffset+length-1 were first copied to a temporary * array with length components and then the contents of * the temporary array were copied into positions * dstOffset through dstOffset+length-1 of the * destination array. *

* If dst is null, then a * NullPointerException is thrown. *

* If src is null, then a * NullPointerException is thrown and the destination * array is not modified. *

* Otherwise, if any of the following is true, an * ArrayStoreException is thrown and the destination is * not modified: *

*

* Otherwise, if any of the following is true, an * IndexOutOfBoundsException is * thrown and the destination is not modified: *

*

* Otherwise, if any actual component of the source array from * position srcOffset through * srcOffset+length-1 cannot be converted to the component * type of the destination array by assignment conversion, an * ArrayStoreException is thrown. In this case, let * k be the smallest nonnegative integer less than * length such that src[srcOffset+k] * cannot be converted to the component type of the destination * array; when the exception is thrown, source array components from * positions srcOffset through * srcOffset+k-1 * will already have been copied to destination array positions * dstOffset through * dstOffset+k-1 and no other * positions of the destination array will have been modified. * (Because of the restrictions already itemized, this * paragraph effectively applies only to the situation where both * arrays have component types that are reference types.) * * @param src the source array. * @param src_position start position in the source array. * @param dst the destination array. * @param dst_position pos start position in the destination data. * @param length the number of array elements to be copied. * @exception IndexOutOfBoundsException if copying would cause * access of data outside array bounds. * @exception ArrayStoreException if an element in the src * array could not be stored into the dest array * because of a type mismatch. * @exception NullPointerException if either src or * dst is null. */ private void arraycopy( byte[] src, int src_position, byte[] dst, int dst_position, int length ) { System.arraycopy( src, src_position, dst, dst_position, length ); } /** * @return the unfinished string */ String getUnfinishedString() { return unfinishedString; } /** * @return true if current string uses wide characters */ boolean isWideChar() { return wideChar; } }