/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2002 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache POI" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache POI", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
*
* Description: This holds all the strings for LabelSSTRecords. *
* REFERENCE: PG 389 Microsoft Excel 97 Developer's Kit (ISBN: * 1-57231-498-2) *
* @author Andrew C. Oliver (acoliver at apache dot org) * @author Marc Johnson (mjohnson at apache dot org) * @author Glen Stampoultzis (glens at apache.org) * @version 2.0-pre * @see org.apache.poi.hssf.record.LabelSSTRecord * @see org.apache.poi.hssf.record.ContinueRecord */ public class SSTRecord extends Record { /** how big can an SST record be? As big as any record can be: 8228 bytes */ static final int MAX_RECORD_SIZE = 8228; /** standard record overhead: two shorts (record id plus data space size)*/ static final int STD_RECORD_OVERHEAD = 2 * LittleEndianConsts.SHORT_SIZE; /** SST overhead: the standard record overhead, plus the number of strings and the number of unique strings -- two ints */ static final int SST_RECORD_OVERHEAD = ( STD_RECORD_OVERHEAD + ( 2 * LittleEndianConsts.INT_SIZE ) ); /** how much data can we stuff into an SST record? That would be _max minus the standard SST record overhead */ static final int MAX_DATA_SPACE = MAX_RECORD_SIZE - SST_RECORD_OVERHEAD; /** overhead for each string includes the string's character count (a short) and the flag describing its characteristics (a byte) */ static final int STRING_MINIMAL_OVERHEAD = LittleEndianConsts.SHORT_SIZE + LittleEndianConsts.BYTE_SIZE; public static final short sid = 0xfc; /** union of strings in the SST and EXTSST */ private int field_1_num_strings; /** according to docs ONLY SST */ private int field_2_num_unique_strings; private BinaryTree field_3_strings; /** Record lengths for initial SST record and all continue records */ private List _record_lengths = null; private SSTDeserializer deserializer; /** * default constructor */ public SSTRecord() { field_1_num_strings = 0; field_2_num_unique_strings = 0; field_3_strings = new BinaryTree(); deserializer = new SSTDeserializer(field_3_strings); } /** * Constructs an SST record and sets its fields appropriately. * * @param id must be 0xfc or an exception will be throw upon * validation * @param size the size of the data area of the record * @param data of the record (should not contain sid/len) */ public SSTRecord( final short id, final short size, final byte[] data ) { super( id, size, data ); } /** * Constructs an SST record and sets its fields appropriately. * * @param id must be 0xfc or an exception will be throw upon * validation * @param size the size of the data area of the record * @param data of the record (should not contain sid/len) * @param offset of the record */ public SSTRecord( final short id, final short size, final byte[] data, int offset ) { super( id, size, data, offset ); } /** * Add a string. Determines whether 8-bit encoding can be used, or * whether 16-bit encoding must be used. *
* THIS IS THE PREFERRED METHOD OF ADDING A STRING. IF YOU USE THE * OTHER ,code>addString METHOD AND FORCE 8-BIT ENCODING ON * A STRING THAT SHOULD USE 16-BIT ENCODING, YOU WILL CORRUPT THE * STRING; IF YOU USE THAT METHOD AND FORCE 16-BIT ENCODING, YOU * ARE WASTING SPACE WHEN THE WORKBOOK IS WRITTEN OUT. * * @param string string to be added * * @return the index of that string in the table */ public int addString( final String string ) { int rval; if ( string == null ) { rval = addString( "", false ); } else { // scan for characters greater than 255 ... if any are // present, we have to use 16-bit encoding. Otherwise, we // can use 8-bit encoding boolean useUTF16 = false; int strlen = string.length(); for ( int j = 0; j < strlen; j++ ) { if ( string.charAt( j ) > 255 ) { useUTF16 = true; break; } } rval = addString( string, useUTF16 ); } return rval; } /** * Add a string and assert the encoding (8-bit or 16-bit) to be * used. *
* USE THIS METHOD AT YOUR OWN RISK. IF YOU FORCE 8-BIT ENCODING,
* YOU MAY CORRUPT YOUR STRING. IF YOU FORCE 16-BIT ENCODING AND
* IT ISN'T NECESSARY, YOU WILL WASTE SPACE WHEN THIS RECORD IS
* WRITTEN OUT.
*
* @param string string to be added
* @param useUTF16 if true, forces 16-bit encoding. If false,
* forces 8-bit encoding
*
* @return the index of that string in the table
*/
public int addString( final String string, final boolean useUTF16 )
{
field_1_num_strings++;
String str = ( string == null ) ? ""
: string;
int rval = -1;
UnicodeString ucs = new UnicodeString();
ucs.setString( str );
ucs.setCharCount( (short) str.length() );
ucs.setOptionFlags( (byte) ( useUTF16 ? 1
: 0 ) );
Integer integer = (Integer) field_3_strings.getKeyForValue( ucs );
if ( integer != null )
{
rval = integer.intValue();
}
else
{
// This is a new string -- we didn't see it among the
// strings we've already collected
rval = field_3_strings.size();
field_2_num_unique_strings++;
integer = new Integer( rval );
SSTDeserializer.addToStringTable( field_3_strings, integer, ucs );
// field_3_strings.put( integer, ucs );
}
return rval;
}
/**
* @return number of strings
*/
public int getNumStrings()
{
return field_1_num_strings;
}
/**
* @return number of unique strings
*/
public int getNumUniqueStrings()
{
return field_2_num_unique_strings;
}
/**
* USE THIS METHOD AT YOUR OWN PERIL: THE addString
* METHODS MANIPULATE THE NUMBER OF STRINGS AS A SIDE EFFECT; YOUR
* ATTEMPTS AT MANIPULATING THE STRING COUNT IS LIKELY TO BE VERY
* WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN THIS RECORD IS
* WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ THE RECORD
*
* @param count number of strings
*
*/
public void setNumStrings( final int count )
{
field_1_num_strings = count;
}
/**
* USE THIS METHOD AT YOUR OWN PERIL: THE addString
* METHODS MANIPULATE THE NUMBER OF UNIQUE STRINGS AS A SIDE
* EFFECT; YOUR ATTEMPTS AT MANIPULATING THE UNIQUE STRING COUNT
* IS LIKELY TO BE VERY WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN
* THIS RECORD IS WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ
* THE RECORD
*
* @param count number of strings
*/
public void getNumUniqueStrings( final int count )
{
field_2_num_unique_strings = count;
}
/**
* Get a particular string by its index
*
* @param id index into the array of strings
*
* @return the desired string
*/
public String getString( final int id )
{
return ( (UnicodeString) field_3_strings.get( new Integer( id ) ) ).getString();
}
public boolean isString16bit( final int id )
{
UnicodeString unicodeString = ( (UnicodeString) field_3_strings.get( new Integer( id ) ) );
return ( ( unicodeString.getOptionFlags() & 0x01 ) == 1 );
}
/**
* Return a debugging string representation
*
* @return string representation
*/
public String toString()
{
StringBuffer buffer = new StringBuffer();
buffer.append( "[SST]\n" );
buffer.append( " .numstrings = " )
.append( Integer.toHexString( getNumStrings() ) ).append( "\n" );
buffer.append( " .uniquestrings = " )
.append( Integer.toHexString( getNumUniqueStrings() ) ).append( "\n" );
for ( int k = 0; k < field_3_strings.size(); k++ )
{
buffer.append( " .string_" + k + " = " )
.append( ( (UnicodeString) field_3_strings
.get( new Integer( k ) ) ).toString() ).append( "\n" );
}
buffer.append( "[/SST]\n" );
return buffer.toString();
}
/**
* @return sid
*/
public short getSid()
{
return sid;
}
/**
* @return hashcode
*/
public int hashCode()
{
return field_2_num_unique_strings;
}
public boolean equals( Object o )
{
if ( ( o == null ) || ( o.getClass() != this.getClass() ) )
{
return false;
}
SSTRecord other = (SSTRecord) o;
return ( ( field_1_num_strings == other
.field_1_num_strings ) && ( field_2_num_unique_strings == other
.field_2_num_unique_strings ) && field_3_strings
.equals( other.field_3_strings ) );
}
/**
* validate SID
*
* @param id the alleged SID
*
* @exception RecordFormatException if validation fails
*/
protected void validateSid( final short id )
throws RecordFormatException
{
if ( id != sid )
{
throw new RecordFormatException( "NOT An SST RECORD" );
}
}
/**
* Fill the fields from the data
*
* The data consists of sets of string data. This string data is * arranged as follows: *
*
* short string_length; // length of string data
* byte string_flag; // flag specifying special string
* // handling
* short run_count; // optional count of formatting runs
* int extend_length; // optional extension length
* char[] string_data; // string data, can be byte[] or
* // short[] (length of array is
* // string_length)
* int[] formatting_runs; // optional formatting runs (length of
* // array is run_count)
* byte[] extension; // optional extension (length of array
* // is extend_length)
*
*
* The string_flag is bit mapped as follows: *
*
Bit number | *Meaning if 0 | *Meaning if 1 | *
---|---|---|
0 | *string_data is byte[] | *string_data is short[] * |
1 | *Should always be 0 | *string_flag is defective * |
2 | *extension is not included | *extension is included * |
3 | *formatting run data is not included | *formatting run data is included * |
4 | *Should always be 0 | *string_flag is defective * |
5 | *Should always be 0 | *string_flag is defective * |
6 | *Should always be 0 | *string_flag is defective * |
7 | *Should always be 0 | *string_flag is defective * |
* We can handle eating the overhead associated with bits 2 or 3 * (or both) being set, but we have no idea what to do with the * associated data. The UnicodeString class can handle the byte[] * vs short[] nature of the actual string data * * @param data raw data * @param size size of the raw data */ protected void fillFields( final byte[] data, final short size, int offset ) { // this method is ALWAYS called after construction -- using // the nontrivial constructor, of course -- so this is where // we initialize our fields field_1_num_strings = LittleEndian.getInt( data, 0 + offset ); field_2_num_unique_strings = LittleEndian.getInt( data, 4 + offset ); field_3_strings = new BinaryTree(); deserializer = new SSTDeserializer(field_3_strings); deserializer.manufactureStrings( data, 8 + offset, (short)(size - 8) ); } /** * @return an iterator of the strings we hold. All instances are * UnicodeStrings */ Iterator getStrings() { return field_3_strings.values().iterator(); } /** * @return count of the strings we hold. */ int countStrings() { return field_3_strings.size(); } /** * called by the class that is responsible for writing this sucker. * Subclasses should implement this so that their data is passed back in a * byte array. * * @return byte array containing instance data */ public int serialize( int offset, byte[] data ) { SSTSerializer serializer = new SSTSerializer( _record_lengths, field_3_strings, getNumStrings(), getNumUniqueStrings() ); return serializer.serialize( offset, data ); } // we can probably simplify this later...this calculates the size // w/o serializing but still is a bit slow public int getRecordSize() { SSTSerializer serializer = new SSTSerializer( _record_lengths, field_3_strings, getNumStrings(), getNumUniqueStrings() ); return serializer.getRecordSize(); } SSTDeserializer getDeserializer() { return deserializer; } /** * Strange to handle continue records this way. Is it a smell? */ public void processContinueRecord( byte[] record ) { deserializer.processContinueRecord( record ); } }