ExtSST serialization pacth to fix corruption when there are a large number of strings in the SST record.

It seems that only 128 buckets can be serialized. This patch addresses this excel oddity.


git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/branches/REL_2_BRANCH@353372 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jason Height 2003-09-25 08:08:05 +00:00
parent 70a2698295
commit 1afc8c1cb4
4 changed files with 38 additions and 15 deletions

View File

@ -73,6 +73,7 @@ import org.apache.poi.util.LittleEndian;
public class ExtSSTInfoSubRecord public class ExtSSTInfoSubRecord
extends Record extends Record
{ {
public static final int INFO_SIZE = 8;
public final static short sid = public final static short sid =
0xFFF; // only here for conformance, doesn't really have an sid 0xFFF; // only here for conformance, doesn't really have an sid
private int field_1_stream_pos; // stream pointer to the SST record private int field_1_stream_pos; // stream pointer to the SST record

View File

@ -76,6 +76,9 @@ public class ExtSSTRecord
extends Record extends Record
{ {
public static final int DEFAULT_BUCKET_SIZE = 8; public static final int DEFAULT_BUCKET_SIZE = 8;
//Cant seem to find this documented but from the biffviewer it is clear that
//Excel only records the indexes for the first 128 buckets.
public static final int MAX_BUCKETS = 128;
public final static short sid = 0xff; public final static short sid = 0xff;
private short field_1_strings_per_bucket = DEFAULT_BUCKET_SIZE; private short field_1_strings_per_bucket = DEFAULT_BUCKET_SIZE;
private ArrayList field_2_sst_info; private ArrayList field_2_sst_info;
@ -202,11 +205,28 @@ public class ExtSSTRecord
return pos; return pos;
} }
/** Returns the size of this record */
public int getRecordSize() public int getRecordSize()
{ {
return 6 + 8*getNumInfoRecords(); return 6 + 8*getNumInfoRecords();
} }
public static final int getNumberOfInfoRecsForStrings(int numStrings) {
int infoRecs = (numStrings / DEFAULT_BUCKET_SIZE);
if ((numStrings % DEFAULT_BUCKET_SIZE) != 0)
infoRecs ++;
//Excel seems to max out after 128 info records.
//This isnt really documented anywhere...
if (infoRecs > MAX_BUCKETS)
infoRecs = MAX_BUCKETS;
return infoRecs;
}
/** Given a number of strings (in the sst), returns the size of the extsst record*/
public static final int getRecordSizeForStrings(int numStrings) {
return 4 + 2 + (getNumberOfInfoRecsForStrings(numStrings) * 8);
}
public short getSid() public short getSid()
{ {
return sid; return sid;

View File

@ -586,10 +586,7 @@ public class SSTRecord
*/ */
public int calcExtSSTRecordSize() public int calcExtSSTRecordSize()
{ {
int infoRecs = (field_3_strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE); return ExtSSTRecord.getRecordSizeForStrings(field_3_strings.size());
if ((field_3_strings.size() % SSTSerializer.DEFAULT_BUCKET_SIZE) != 0)
infoRecs ++;
return 4 + 2 + (infoRecs * 8);
} }
} }

View File

@ -82,8 +82,6 @@ class SSTSerializer
/** Offsets relative the start of the current SST or continue record */ /** Offsets relative the start of the current SST or continue record */
int[] bucketRelativeOffsets; int[] bucketRelativeOffsets;
int startOfSST, startOfRecord; int startOfSST, startOfRecord;
/** The default bucket size (this is used for ExternSST) */
final static int DEFAULT_BUCKET_SIZE = 8;
public SSTSerializer( List recordLengths, BinaryTree strings, int numStrings, int numUniqueStrings ) public SSTSerializer( List recordLengths, BinaryTree strings, int numStrings, int numUniqueStrings )
{ {
@ -93,9 +91,7 @@ class SSTSerializer
this.numUniqueStrings = numUniqueStrings; this.numUniqueStrings = numUniqueStrings;
this.sstRecordHeader = new SSTRecordHeader( numStrings, numUniqueStrings ); this.sstRecordHeader = new SSTRecordHeader( numStrings, numUniqueStrings );
int infoRecs = (strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE); int infoRecs = ExtSSTRecord.getNumberOfInfoRecsForStrings(strings.size());
if ((strings.size() % SSTSerializer.DEFAULT_BUCKET_SIZE) != 0)
infoRecs ++;
this.bucketAbsoluteOffsets = new int[infoRecs]; this.bucketAbsoluteOffsets = new int[infoRecs];
this.bucketRelativeOffsets = new int[infoRecs]; this.bucketRelativeOffsets = new int[infoRecs];
} }
@ -157,10 +153,14 @@ class SSTSerializer
for ( int k = 0; k < strings.size(); k++ ) for ( int k = 0; k < strings.size(); k++ )
{ {
if (k % DEFAULT_BUCKET_SIZE == 0) if (k % ExtSSTRecord.DEFAULT_BUCKET_SIZE == 0)
{ {
bucketAbsoluteOffsets[k / DEFAULT_BUCKET_SIZE] = pos; int index = k/ExtSSTRecord.DEFAULT_BUCKET_SIZE;
bucketRelativeOffsets[k / DEFAULT_BUCKET_SIZE] = pos; if (index < ExtSSTRecord.MAX_BUCKETS) {
//Excel only indexes the first 128 buckets.
bucketAbsoluteOffsets[index] = pos;
bucketRelativeOffsets[index] = pos;
}
} }
System.arraycopy( getUnicodeString( k ).serialize(), 0, data, pos + offset, getUnicodeString( k ).getRecordSize() ); System.arraycopy( getUnicodeString( k ).serialize(), 0, data, pos + offset, getUnicodeString( k ).getRecordSize() );
pos += getUnicodeString( k ).getRecordSize(); pos += getUnicodeString( k ).getRecordSize();
@ -210,10 +210,15 @@ class SSTSerializer
{ {
UnicodeString unistr = getUnicodeString( stringIndex ); UnicodeString unistr = getUnicodeString( stringIndex );
if (stringIndex % DEFAULT_BUCKET_SIZE == 0) if (stringIndex % ExtSSTRecord.DEFAULT_BUCKET_SIZE == 0)
{ {
bucketAbsoluteOffsets[stringIndex / DEFAULT_BUCKET_SIZE] = offset + totalWritten + recordProcessor.getRecordOffset() - startOfSST; int index = stringIndex / ExtSSTRecord.DEFAULT_BUCKET_SIZE;
bucketRelativeOffsets[stringIndex / DEFAULT_BUCKET_SIZE] = offset + totalWritten + recordProcessor.getRecordOffset() - startOfRecord; if (index < ExtSSTRecord.MAX_BUCKETS) {
bucketAbsoluteOffsets[index] = offset + totalWritten +
recordProcessor.getRecordOffset() - startOfSST;
bucketRelativeOffsets[index] = offset + totalWritten +
recordProcessor.getRecordOffset() - startOfRecord;
}
} }
if ( unistr.getRecordSize() <= recordProcessor.getAvailable() ) if ( unistr.getRecordSize() <= recordProcessor.getAvailable() )