857 lines
28 KiB
Java
857 lines
28 KiB
Java
/* ====================================================================
|
|
* The Apache Software License, Version 1.1
|
|
*
|
|
* Copyright (c) 2002 The Apache Software Foundation. All rights
|
|
* reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
* 3. The end-user documentation included with the redistribution,
|
|
* if any, must include the following acknowledgment:
|
|
* "This product includes software developed by the
|
|
* Apache Software Foundation (http://www.apache.org/)."
|
|
* Alternately, this acknowledgment may appear in the software itself,
|
|
* if and wherever such third-party acknowledgments normally appear.
|
|
*
|
|
* 4. The names "Apache" and "Apache Software Foundation" and
|
|
* "Apache POI" must not be used to endorse or promote products
|
|
* derived from this software without prior written permission. For
|
|
* written permission, please contact apache@apache.org.
|
|
*
|
|
* 5. Products derived from this software may not be called "Apache",
|
|
* "Apache POI", nor may "Apache" appear in their name, without
|
|
* prior written permission of the Apache Software Foundation.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
|
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
* ====================================================================
|
|
*
|
|
* This software consists of voluntary contributions made by many
|
|
* individuals on behalf of the Apache Software Foundation. For more
|
|
* information on the Apache Software Foundation, please see
|
|
* <http://www.apache.org/>.
|
|
*/
|
|
|
|
package org.apache.poi.hssf.record;
|
|
|
|
import org.apache.poi.util.BinaryTree;
|
|
import org.apache.poi.util.LittleEndian;
|
|
import org.apache.poi.util.LittleEndianConsts;
|
|
|
|
import java.util.Iterator;
|
|
import java.util.List;
|
|
|
|
/**
|
|
* Title: Static String Table Record
|
|
* <P>
|
|
* Description: This holds all the strings for LabelSSTRecords.
|
|
* <P>
|
|
* REFERENCE: PG 389 Microsoft Excel 97 Developer's Kit (ISBN:
|
|
* 1-57231-498-2)
|
|
* <P>
|
|
* @author Andrew C. Oliver (acoliver at apache dot org)
|
|
* @author Marc Johnson (mjohnson at apache dot org)
|
|
* @author Glen Stampoultzis (glens at apache.org)
|
|
* @version 2.0-pre
|
|
* @see org.apache.poi.hssf.record.LabelSSTRecord
|
|
* @see org.apache.poi.hssf.record.ContinueRecord
|
|
*/
|
|
|
|
public class SSTRecord
|
|
extends Record
|
|
{
|
|
|
|
/** how big can an SST record be? As big as any record can be: 8228 bytes */
|
|
static final int MAX_RECORD_SIZE = 8228;
|
|
|
|
/** standard record overhead: two shorts (record id plus data space size)*/
|
|
static final int STD_RECORD_OVERHEAD =
|
|
2 * LittleEndianConsts.SHORT_SIZE;
|
|
|
|
/** SST overhead: the standard record overhead, plus the number of strings and the number of unique strings -- two ints */
|
|
static final int SST_RECORD_OVERHEAD =
|
|
( STD_RECORD_OVERHEAD + ( 2 * LittleEndianConsts.INT_SIZE ) );
|
|
|
|
/** how much data can we stuff into an SST record? That would be _max minus the standard SST record overhead */
|
|
static final int MAX_DATA_SPACE = MAX_RECORD_SIZE - SST_RECORD_OVERHEAD;
|
|
|
|
/** overhead for each string includes the string's character count (a short) and the flag describing its characteristics (a byte) */
|
|
static final int STRING_MINIMAL_OVERHEAD = LittleEndianConsts.SHORT_SIZE + LittleEndianConsts.BYTE_SIZE;
|
|
|
|
public static final short sid = 0xfc;
|
|
|
|
/** union of strings in the SST and EXTSST */
|
|
private int field_1_num_strings;
|
|
|
|
/** according to docs ONLY SST */
|
|
private int field_2_num_unique_strings;
|
|
private BinaryTree field_3_strings;
|
|
|
|
/** this is the number of characters we expect in the first sub-record in a subsequent continuation record */
|
|
private int __expected_chars;
|
|
|
|
/** this is the string we were working on before hitting the end of the current record. This string is NOT finished. */
|
|
private String _unfinished_string;
|
|
|
|
/** this is the total length of the current string being handled */
|
|
private int _total_length_bytes;
|
|
|
|
/** this is the offset into a string field of the actual string data */
|
|
private int _string_data_offset;
|
|
|
|
/** this is true if the string uses wide characters */
|
|
private boolean _wide_char;
|
|
|
|
/** Record lengths for initial SST record and all continue records */
|
|
private List _record_lengths = null;
|
|
|
|
/**
|
|
* default constructor
|
|
*/
|
|
|
|
public SSTRecord()
|
|
{
|
|
field_1_num_strings = 0;
|
|
field_2_num_unique_strings = 0;
|
|
field_3_strings = new BinaryTree();
|
|
setExpectedChars( 0 );
|
|
_unfinished_string = "";
|
|
_total_length_bytes = 0;
|
|
_string_data_offset = 0;
|
|
_wide_char = false;
|
|
}
|
|
|
|
/**
|
|
* Constructs an SST record and sets its fields appropriately.
|
|
*
|
|
* @param id must be 0xfc or an exception will be throw upon
|
|
* validation
|
|
* @param size the size of the data area of the record
|
|
* @param data of the record (should not contain sid/len)
|
|
*/
|
|
|
|
public SSTRecord( final short id, final short size, final byte[] data )
|
|
{
|
|
super( id, size, data );
|
|
}
|
|
|
|
/**
|
|
* Constructs an SST record and sets its fields appropriately.
|
|
*
|
|
* @param id must be 0xfc or an exception will be throw upon
|
|
* validation
|
|
* @param size the size of the data area of the record
|
|
* @param data of the record (should not contain sid/len)
|
|
* @param offset of the record
|
|
*/
|
|
|
|
public SSTRecord( final short id, final short size, final byte[] data,
|
|
int offset )
|
|
{
|
|
super( id, size, data, offset );
|
|
}
|
|
|
|
/**
|
|
* Add a string. Determines whether 8-bit encoding can be used, or
|
|
* whether 16-bit encoding must be used.
|
|
* <p>
|
|
* THIS IS THE PREFERRED METHOD OF ADDING A STRING. IF YOU USE THE
|
|
* OTHER ,code>addString</code> METHOD AND FORCE 8-BIT ENCODING ON
|
|
* A STRING THAT SHOULD USE 16-BIT ENCODING, YOU WILL CORRUPT THE
|
|
* STRING; IF YOU USE THAT METHOD AND FORCE 16-BIT ENCODING, YOU
|
|
* ARE WASTING SPACE WHEN THE WORKBOOK IS WRITTEN OUT.
|
|
*
|
|
* @param string string to be added
|
|
*
|
|
* @return the index of that string in the table
|
|
*/
|
|
|
|
public int addString( final String string )
|
|
{
|
|
int rval;
|
|
|
|
if ( string == null )
|
|
{
|
|
rval = addString( "", false );
|
|
}
|
|
else
|
|
{
|
|
|
|
// scan for characters greater than 255 ... if any are
|
|
// present, we have to use 16-bit encoding. Otherwise, we
|
|
// can use 8-bit encoding
|
|
boolean useUTF16 = false;
|
|
int strlen = string.length();
|
|
|
|
for ( int j = 0; j < strlen; j++ )
|
|
{
|
|
if ( string.charAt( j ) > 255 )
|
|
{
|
|
useUTF16 = true;
|
|
break;
|
|
}
|
|
}
|
|
rval = addString( string, useUTF16 );
|
|
}
|
|
return rval;
|
|
}
|
|
|
|
/**
|
|
* Add a string and assert the encoding (8-bit or 16-bit) to be
|
|
* used.
|
|
* <P>
|
|
* USE THIS METHOD AT YOUR OWN RISK. IF YOU FORCE 8-BIT ENCODING,
|
|
* YOU MAY CORRUPT YOUR STRING. IF YOU FORCE 16-BIT ENCODING AND
|
|
* IT ISN'T NECESSARY, YOU WILL WASTE SPACE WHEN THIS RECORD IS
|
|
* WRITTEN OUT.
|
|
*
|
|
* @param string string to be added
|
|
* @param useUTF16 if true, forces 16-bit encoding. If false,
|
|
* forces 8-bit encoding
|
|
*
|
|
* @return the index of that string in the table
|
|
*/
|
|
|
|
public int addString( final String string, final boolean useUTF16 )
|
|
{
|
|
field_1_num_strings++;
|
|
String str = ( string == null ) ? ""
|
|
: string;
|
|
int rval = -1;
|
|
UnicodeString ucs = new UnicodeString();
|
|
|
|
ucs.setString( str );
|
|
ucs.setCharCount( (short) str.length() );
|
|
ucs.setOptionFlags( (byte) ( useUTF16 ? 1
|
|
: 0 ) );
|
|
Integer integer = (Integer) field_3_strings.getKeyForValue( ucs );
|
|
|
|
if ( integer != null )
|
|
{
|
|
rval = integer.intValue();
|
|
}
|
|
else
|
|
{
|
|
|
|
// This is a new string -- we didn't see it among the
|
|
// strings we've already collected
|
|
rval = field_3_strings.size();
|
|
field_2_num_unique_strings++;
|
|
integer = new Integer( rval );
|
|
addToStringTable( integer, ucs );
|
|
// field_3_strings.put( integer, ucs );
|
|
}
|
|
return rval;
|
|
}
|
|
|
|
/**
|
|
* @return number of strings
|
|
*/
|
|
|
|
public int getNumStrings()
|
|
{
|
|
return field_1_num_strings;
|
|
}
|
|
|
|
/**
|
|
* @return number of unique strings
|
|
*/
|
|
|
|
public int getNumUniqueStrings()
|
|
{
|
|
return field_2_num_unique_strings;
|
|
}
|
|
|
|
/**
|
|
* USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
|
|
* METHODS MANIPULATE THE NUMBER OF STRINGS AS A SIDE EFFECT; YOUR
|
|
* ATTEMPTS AT MANIPULATING THE STRING COUNT IS LIKELY TO BE VERY
|
|
* WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN THIS RECORD IS
|
|
* WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ THE RECORD
|
|
*
|
|
* @param count number of strings
|
|
*
|
|
*/
|
|
|
|
public void setNumStrings( final int count )
|
|
{
|
|
field_1_num_strings = count;
|
|
}
|
|
|
|
/**
|
|
* USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
|
|
* METHODS MANIPULATE THE NUMBER OF UNIQUE STRINGS AS A SIDE
|
|
* EFFECT; YOUR ATTEMPTS AT MANIPULATING THE UNIQUE STRING COUNT
|
|
* IS LIKELY TO BE VERY WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN
|
|
* THIS RECORD IS WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ
|
|
* THE RECORD
|
|
*
|
|
* @param count number of strings
|
|
*/
|
|
|
|
public void getNumUniqueStrings( final int count )
|
|
{
|
|
field_2_num_unique_strings = count;
|
|
}
|
|
|
|
/**
|
|
* Get a particular string by its index
|
|
*
|
|
* @param id index into the array of strings
|
|
*
|
|
* @return the desired string
|
|
*/
|
|
|
|
public String getString( final int id )
|
|
{
|
|
return ( (UnicodeString) field_3_strings.get( new Integer( id ) ) ).getString();
|
|
}
|
|
|
|
public boolean isString16bit( final int id )
|
|
{
|
|
UnicodeString unicodeString = ( (UnicodeString) field_3_strings.get( new Integer( id ) ) );
|
|
return ( ( unicodeString.getOptionFlags() & 0x01 ) == 1 );
|
|
}
|
|
|
|
/**
|
|
* Return a debugging string representation
|
|
*
|
|
* @return string representation
|
|
*/
|
|
|
|
public String toString()
|
|
{
|
|
StringBuffer buffer = new StringBuffer();
|
|
|
|
buffer.append( "[SST]\n" );
|
|
buffer.append( " .numstrings = " )
|
|
.append( Integer.toHexString( getNumStrings() ) ).append( "\n" );
|
|
buffer.append( " .uniquestrings = " )
|
|
.append( Integer.toHexString( getNumUniqueStrings() ) ).append( "\n" );
|
|
for ( int k = 0; k < field_3_strings.size(); k++ )
|
|
{
|
|
buffer.append( " .string_" + k + " = " )
|
|
.append( ( (UnicodeString) field_3_strings
|
|
.get( new Integer( k ) ) ).toString() ).append( "\n" );
|
|
}
|
|
buffer.append( "[/SST]\n" );
|
|
return buffer.toString();
|
|
}
|
|
|
|
|
|
/**
|
|
* Process a Continue record. A Continue record for an SST record
|
|
* contains the same kind of data that the SST record contains,
|
|
* with the following exceptions:
|
|
* <P>
|
|
* <OL>
|
|
* <LI>The string counts at the beginning of the SST record are
|
|
* not in the Continue record
|
|
* <LI>The first string in the Continue record might NOT begin
|
|
* with a size. If the last string in the previous record is
|
|
* continued in this record, the size is determined by that
|
|
* last string in the previous record; the first string will
|
|
* begin with a flag byte, followed by the remaining bytes (or
|
|
* words) of the last string from the previous
|
|
* record. Otherwise, the first string in the record will
|
|
* begin with a string length
|
|
* </OL>
|
|
*
|
|
* @param record the Continue record's byte data
|
|
*/
|
|
|
|
public void processContinueRecord( final byte[] record )
|
|
{
|
|
if ( getExpectedChars() == 0 )
|
|
{
|
|
_unfinished_string = "";
|
|
_total_length_bytes = 0;
|
|
_string_data_offset = 0;
|
|
_wide_char = false;
|
|
manufactureStrings( record, 0, (short) record.length );
|
|
}
|
|
else
|
|
{
|
|
int data_length = record.length - LittleEndianConsts.BYTE_SIZE;
|
|
|
|
if ( calculateByteCount( getExpectedChars() ) > data_length )
|
|
{
|
|
|
|
// create artificial data to create a UnicodeString
|
|
byte[] input =
|
|
new byte[record.length + LittleEndianConsts.SHORT_SIZE];
|
|
short size = (short) ( ( ( record[0] & 1 ) == 1 )
|
|
? ( data_length
|
|
/ LittleEndianConsts.SHORT_SIZE )
|
|
: ( data_length
|
|
/ LittleEndianConsts.BYTE_SIZE ) );
|
|
|
|
LittleEndian.putShort( input, (byte) 0, size );
|
|
System.arraycopy( record, 0, input,
|
|
LittleEndianConsts.SHORT_SIZE,
|
|
record.length );
|
|
UnicodeString ucs = new UnicodeString( UnicodeString.sid,
|
|
(short) input.length,
|
|
input );
|
|
|
|
_unfinished_string = _unfinished_string + ucs.getString();
|
|
setExpectedChars( getExpectedChars() - size );
|
|
}
|
|
else
|
|
{
|
|
setupStringParameters( record, -LittleEndianConsts.SHORT_SIZE,
|
|
getExpectedChars() );
|
|
byte[] str_data = new byte[_total_length_bytes];
|
|
int length = STRING_MINIMAL_OVERHEAD
|
|
+ ( calculateByteCount( getExpectedChars() ) );
|
|
byte[] bstring = new byte[length];
|
|
|
|
// Copy data from the record into the string
|
|
// buffer. Copy skips the length of a short in the
|
|
// string buffer, to leave room for the string length.
|
|
System.arraycopy( record, 0, str_data,
|
|
LittleEndianConsts.SHORT_SIZE,
|
|
str_data.length
|
|
- LittleEndianConsts.SHORT_SIZE );
|
|
|
|
// write the string length
|
|
LittleEndian.putShort( bstring, 0,
|
|
(short) getExpectedChars() );
|
|
|
|
// write the options flag
|
|
bstring[LittleEndianConsts.SHORT_SIZE] =
|
|
str_data[LittleEndianConsts.SHORT_SIZE];
|
|
|
|
// copy the bytes/words making up the string; skipping
|
|
// past all the overhead of the str_data array
|
|
System.arraycopy( str_data, _string_data_offset, bstring,
|
|
STRING_MINIMAL_OVERHEAD,
|
|
bstring.length - STRING_MINIMAL_OVERHEAD );
|
|
|
|
// use special constructor to create the final string
|
|
UnicodeString string =
|
|
new UnicodeString( UnicodeString.sid,
|
|
(short) bstring.length, bstring,
|
|
_unfinished_string );
|
|
Integer integer = new Integer( field_3_strings.size() );
|
|
|
|
// field_3_strings.put( integer, string );
|
|
addToStringTable( integer, string );
|
|
manufactureStrings( record,
|
|
_total_length_bytes
|
|
- LittleEndianConsts
|
|
.SHORT_SIZE, (short) record.length );
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @return sid
|
|
*/
|
|
|
|
public short getSid()
|
|
{
|
|
return sid;
|
|
}
|
|
|
|
/**
|
|
* @return hashcode
|
|
*/
|
|
|
|
public int hashCode()
|
|
{
|
|
return field_2_num_unique_strings;
|
|
}
|
|
|
|
public boolean equals( Object o )
|
|
{
|
|
if ( ( o == null ) || ( o.getClass() != this.getClass() ) )
|
|
{
|
|
return false;
|
|
}
|
|
SSTRecord other = (SSTRecord) o;
|
|
|
|
return ( ( field_1_num_strings == other
|
|
.field_1_num_strings ) && ( field_2_num_unique_strings == other
|
|
.field_2_num_unique_strings ) && field_3_strings
|
|
.equals( other.field_3_strings ) );
|
|
}
|
|
|
|
/**
|
|
* validate SID
|
|
*
|
|
* @param id the alleged SID
|
|
*
|
|
* @exception RecordFormatException if validation fails
|
|
*/
|
|
|
|
protected void validateSid( final short id )
|
|
throws RecordFormatException
|
|
{
|
|
if ( id != sid )
|
|
{
|
|
throw new RecordFormatException( "NOT An SST RECORD" );
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fill the fields from the data
|
|
* <P>
|
|
* The data consists of sets of string data. This string data is
|
|
* arranged as follows:
|
|
* <P>
|
|
* <CODE>
|
|
* short string_length; // length of string data
|
|
* byte string_flag; // flag specifying special string
|
|
* // handling
|
|
* short run_count; // optional count of formatting runs
|
|
* int extend_length; // optional extension length
|
|
* char[] string_data; // string data, can be byte[] or
|
|
* // short[] (length of array is
|
|
* // string_length)
|
|
* int[] formatting_runs; // optional formatting runs (length of
|
|
* // array is run_count)
|
|
* byte[] extension; // optional extension (length of array
|
|
* // is extend_length)
|
|
* </CODE>
|
|
* <P>
|
|
* The string_flag is bit mapped as follows:
|
|
* <P>
|
|
* <TABLE>
|
|
* <TR>
|
|
* <TH>Bit number</TH>
|
|
* <TH>Meaning if 0</TH>
|
|
* <TH>Meaning if 1</TH>
|
|
* <TR>
|
|
* <TR>
|
|
* <TD>0</TD>
|
|
* <TD>string_data is byte[]</TD>
|
|
* <TD>string_data is short[]</TH>
|
|
* <TR>
|
|
* <TR>
|
|
* <TD>1</TD>
|
|
* <TD>Should always be 0</TD>
|
|
* <TD>string_flag is defective</TH>
|
|
* <TR>
|
|
* <TR>
|
|
* <TD>2</TD>
|
|
* <TD>extension is not included</TD>
|
|
* <TD>extension is included</TH>
|
|
* <TR>
|
|
* <TR>
|
|
* <TD>3</TD>
|
|
* <TD>formatting run data is not included</TD>
|
|
* <TD>formatting run data is included</TH>
|
|
* <TR>
|
|
* <TR>
|
|
* <TD>4</TD>
|
|
* <TD>Should always be 0</TD>
|
|
* <TD>string_flag is defective</TH>
|
|
* <TR>
|
|
* <TR>
|
|
* <TD>5</TD>
|
|
* <TD>Should always be 0</TD>
|
|
* <TD>string_flag is defective</TH>
|
|
* <TR>
|
|
* <TR>
|
|
* <TD>6</TD>
|
|
* <TD>Should always be 0</TD>
|
|
* <TD>string_flag is defective</TH>
|
|
* <TR>
|
|
* <TR>
|
|
* <TD>7</TD>
|
|
* <TD>Should always be 0</TD>
|
|
* <TD>string_flag is defective</TH>
|
|
* <TR>
|
|
* </TABLE>
|
|
* <P>
|
|
* We can handle eating the overhead associated with bits 2 or 3
|
|
* (or both) being set, but we have no idea what to do with the
|
|
* associated data. The UnicodeString class can handle the byte[]
|
|
* vs short[] nature of the actual string data
|
|
*
|
|
* @param data raw data
|
|
* @param size size of the raw data
|
|
*/
|
|
|
|
protected void fillFields( final byte[] data, final short size,
|
|
int offset )
|
|
{
|
|
|
|
// this method is ALWAYS called after construction -- using
|
|
// the nontrivial constructor, of course -- so this is where
|
|
// we initialize our fields
|
|
field_1_num_strings = LittleEndian.getInt( data, 0 + offset );
|
|
field_2_num_unique_strings = LittleEndian.getInt( data, 4 + offset );
|
|
field_3_strings = new BinaryTree();
|
|
setExpectedChars( 0 );
|
|
_unfinished_string = "";
|
|
_total_length_bytes = 0;
|
|
_string_data_offset = 0;
|
|
_wide_char = false;
|
|
manufactureStrings( data, 8 + offset, size );
|
|
}
|
|
|
|
/**
|
|
* @return the number of characters we expect in the first
|
|
* sub-record in a subsequent continuation record
|
|
*/
|
|
|
|
int getExpectedChars()
|
|
{
|
|
return __expected_chars;
|
|
}
|
|
|
|
/**
|
|
* @return an iterator of the strings we hold. All instances are
|
|
* UnicodeStrings
|
|
*/
|
|
|
|
Iterator getStrings()
|
|
{
|
|
return field_3_strings.values().iterator();
|
|
}
|
|
|
|
/**
|
|
* @return count of the strings we hold.
|
|
*/
|
|
|
|
int countStrings()
|
|
{
|
|
return field_3_strings.size();
|
|
}
|
|
|
|
/**
|
|
* @return the unfinished string
|
|
*/
|
|
|
|
String getUnfinishedString()
|
|
{
|
|
return _unfinished_string;
|
|
}
|
|
|
|
/**
|
|
* @return the total length of the current string
|
|
*/
|
|
|
|
int getTotalLength()
|
|
{
|
|
return _total_length_bytes;
|
|
}
|
|
|
|
/**
|
|
* @return offset into current string data
|
|
*/
|
|
|
|
int getStringDataOffset()
|
|
{
|
|
return _string_data_offset;
|
|
}
|
|
|
|
/**
|
|
* @return true if current string uses wide characters
|
|
*/
|
|
|
|
boolean isWideChar()
|
|
{
|
|
return _wide_char;
|
|
}
|
|
|
|
|
|
private void manufactureStrings( final byte[] data, final int index,
|
|
short size )
|
|
{
|
|
int offset = index;
|
|
|
|
while ( offset < size )
|
|
{
|
|
int remaining = size - offset;
|
|
|
|
if ( ( remaining > 0 )
|
|
&& ( remaining < LittleEndianConsts.SHORT_SIZE ) )
|
|
{
|
|
throw new RecordFormatException(
|
|
"Cannot get length of the last string in SSTRecord" );
|
|
}
|
|
if ( remaining == LittleEndianConsts.SHORT_SIZE )
|
|
{
|
|
setExpectedChars( LittleEndian.getShort( data, offset ) );
|
|
_unfinished_string = "";
|
|
break;
|
|
}
|
|
short char_count = LittleEndian.getShort( data, offset );
|
|
|
|
setupStringParameters( data, offset, char_count );
|
|
if ( remaining < _total_length_bytes )
|
|
{
|
|
setExpectedChars( calculateCharCount( _total_length_bytes
|
|
- remaining ) );
|
|
char_count -= getExpectedChars();
|
|
_total_length_bytes = remaining;
|
|
}
|
|
else
|
|
{
|
|
setExpectedChars( 0 );
|
|
}
|
|
processString( data, offset, char_count );
|
|
offset += _total_length_bytes;
|
|
if ( getExpectedChars() != 0 )
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
private void setupStringParameters( final byte[] data, final int index,
|
|
final int char_count )
|
|
{
|
|
byte optionFlag = data[index + LittleEndianConsts.SHORT_SIZE];
|
|
|
|
_wide_char = ( optionFlag & 1 ) == 1;
|
|
boolean extended = ( optionFlag & 4 ) == 4;
|
|
boolean rich_text = ( optionFlag & 8 ) == 8;
|
|
|
|
_total_length_bytes = STRING_MINIMAL_OVERHEAD + calculateByteCount( char_count );
|
|
_string_data_offset = STRING_MINIMAL_OVERHEAD;
|
|
if ( rich_text )
|
|
{
|
|
short run_count = LittleEndian.getShort( data, index + _string_data_offset );
|
|
|
|
_string_data_offset += LittleEndianConsts.SHORT_SIZE;
|
|
_total_length_bytes += LittleEndianConsts.SHORT_SIZE + ( LittleEndianConsts.INT_SIZE * run_count );
|
|
}
|
|
if ( extended )
|
|
{
|
|
int extension_length = LittleEndian.getInt( data, index + _string_data_offset );
|
|
|
|
_string_data_offset += LittleEndianConsts.INT_SIZE;
|
|
_total_length_bytes += LittleEndianConsts.INT_SIZE + extension_length;
|
|
}
|
|
}
|
|
|
|
private void processString( final byte[] data, final int index,
|
|
final short char_count )
|
|
{
|
|
byte[] str_data = new byte[_total_length_bytes];
|
|
int length = STRING_MINIMAL_OVERHEAD
|
|
+ calculateByteCount( char_count );
|
|
byte[] bstring = new byte[length];
|
|
|
|
System.arraycopy( data, index, str_data, 0, str_data.length );
|
|
int offset = 0;
|
|
|
|
LittleEndian.putShort( bstring, offset, char_count );
|
|
offset += LittleEndianConsts.SHORT_SIZE;
|
|
bstring[offset] = str_data[offset];
|
|
System.out.println( "_string_data_offset = " + _string_data_offset );
|
|
System.arraycopy( str_data, _string_data_offset, bstring,
|
|
STRING_MINIMAL_OVERHEAD,
|
|
bstring.length - STRING_MINIMAL_OVERHEAD );
|
|
UnicodeString string = new UnicodeString( UnicodeString.sid,
|
|
(short) bstring.length,
|
|
bstring );
|
|
|
|
if ( getExpectedChars() != 0 )
|
|
{
|
|
_unfinished_string = string.getString();
|
|
}
|
|
else
|
|
{
|
|
Integer integer = new Integer( field_3_strings.size() );
|
|
addToStringTable( integer, string );
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Okay, we are doing some major cheating here. Because we can't handle rich text strings properly
|
|
* we end up getting duplicate strings. To get around this I'm doing do things: 1. Converting rich
|
|
* text to normal text and 2. If there's a duplicate I'm adding a space onto the end. Sneaky perhaps
|
|
* but it gets the job done until we can handle this a little better.
|
|
*/
|
|
private void addToStringTable( Integer integer, UnicodeString string )
|
|
{
|
|
if (string.isRichText())
|
|
string.setOptionFlags( (byte)(string.getOptionFlags() & (~8) ) );
|
|
|
|
boolean added = false;
|
|
while (added == false)
|
|
{
|
|
try
|
|
{
|
|
field_3_strings.put( integer, string );
|
|
added = true;
|
|
}
|
|
catch( Exception ignore )
|
|
{
|
|
string.setString( string.getString() + " " );
|
|
}
|
|
}
|
|
}
|
|
|
|
private void setExpectedChars( final int count )
|
|
{
|
|
__expected_chars = count;
|
|
}
|
|
|
|
private int calculateByteCount( final int character_count )
|
|
{
|
|
return character_count * ( _wide_char ? LittleEndianConsts.SHORT_SIZE
|
|
: LittleEndianConsts.BYTE_SIZE );
|
|
}
|
|
|
|
private int calculateCharCount( final int byte_count )
|
|
{
|
|
return byte_count / ( _wide_char ? LittleEndianConsts.SHORT_SIZE
|
|
: LittleEndianConsts.BYTE_SIZE );
|
|
}
|
|
|
|
/**
|
|
* called by the class that is responsible for writing this sucker.
|
|
* Subclasses should implement this so that their data is passed back in a
|
|
* byte array.
|
|
*
|
|
* @return byte array containing instance data
|
|
*/
|
|
|
|
public int serialize( int offset, byte[] data )
|
|
{
|
|
SSTSerializer serializer = new SSTSerializer(
|
|
_record_lengths, field_3_strings, getNumStrings(), getNumUniqueStrings() );
|
|
return serializer.serialize( offset, data );
|
|
}
|
|
|
|
|
|
// we can probably simplify this later...this calculates the size
|
|
// w/o serializing but still is a bit slow
|
|
public int getRecordSize()
|
|
{
|
|
SSTSerializer serializer = new SSTSerializer(
|
|
_record_lengths, field_3_strings, getNumStrings(), getNumUniqueStrings() );
|
|
|
|
return serializer.getRecordSize();
|
|
}
|
|
|
|
}
|
|
|
|
|