From e418e8688e285374ccf155b8afbec6bc63e9cb24 Mon Sep 17 00:00:00 2001 From: Jason Height Date: Thu, 18 Sep 2003 02:10:50 +0000 Subject: [PATCH] Fixed ExtSST serialization (length not calculated correctly) Implemented DBCellRecord and IndexRecord serialization Can now import into MS Access. Cool! Both of the above fixes were required to make this work. git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353356 13f79535-47bb-0310-9956-ffa450edef68 --- src/java/org/apache/poi/hssf/model/Sheet.java | 265 ++++++------------ .../apache/poi/hssf/record/DBCellRecord.java | 13 +- .../poi/hssf/record/ExtSSTInfoSubRecord.java | 2 +- .../apache/poi/hssf/record/ExtSSTRecord.java | 26 +- .../apache/poi/hssf/record/IndexRecord.java | 7 + .../poi/hssf/record/SSTDeserializer.java | 1 - .../org/apache/poi/hssf/record/SSTRecord.java | 5 +- .../apache/poi/hssf/record/SSTSerializer.java | 7 +- .../aggregates/RowRecordsAggregate.java | 113 +++++++- .../aggregates/ValueRecordsAggregate.java | 93 +++--- 10 files changed, 290 insertions(+), 242 deletions(-) diff --git a/src/java/org/apache/poi/hssf/model/Sheet.java b/src/java/org/apache/poi/hssf/model/Sheet.java index d596cc9ac..456606bc2 100644 --- a/src/java/org/apache/poi/hssf/model/Sheet.java +++ b/src/java/org/apache/poi/hssf/model/Sheet.java @@ -288,6 +288,15 @@ public class Sheet implements Model { retval.windowTwo = (WindowTwoRecord) rec; } + else if ( rec.getSid() == DBCellRecord.sid ) + { + rec = null; + } + else if ( rec.getSid() == IndexRecord.sid ) + { + rec = null; + } + if (rec != null) { @@ -700,49 +709,6 @@ public class Sheet implements Model return preoffset; } - /** - * Serializes all records in the sheet into one big byte array. Use this to write - * the sheet out. - * - * @return byte[] array containing the binary representation of the records in this sheet - * - */ - - public byte [] serialize() - { - log.log(log.DEBUG, "Sheet.serialize"); - - // addDBCellRecords(); - byte[] retval = null; - - // ArrayList bytes = new ArrayList(4096); - int arraysize = getSize(); - int pos = 0; - - // for (int k = 0; k < records.size(); k++) - // { - // bytes.add((( Record ) records.get(k)).serialize()); - // - // } - // for (int k = 0; k < bytes.size(); k++) - // { - // arraysize += (( byte [] ) bytes.get(k)).length; - // log.debug((new StringBuffer("arraysize=")).append(arraysize) - // .toString()); - // } - retval = new byte[ arraysize ]; - for (int k = 0; k < records.size(); k++) - { - - // byte[] rec = (( byte [] ) bytes.get(k)); - // System.arraycopy(rec, 0, retval, pos, rec.length); - pos += (( Record ) records.get(k)).serialize(pos, - retval); // rec.length; - } - log.log(log.DEBUG, "Sheet.serialize returning " + retval); - return retval; - } - /** * Serializes all records in the sheet into one big byte array. Use this to write * the sheet out. @@ -756,40 +722,69 @@ public class Sheet implements Model { log.log(log.DEBUG, "Sheet.serialize using offsets"); - // addDBCellRecords(); - // ArrayList bytes = new ArrayList(4096); - // int arraysize = getSize(); // 0; - int pos = 0; - - // for (int k = 0; k < records.size(); k++) - // { - // bytes.add((( Record ) records.get(k)).serialize()); - // - // } - // for (int k = 0; k < bytes.size(); k++) - // { - // arraysize += (( byte [] ) bytes.get(k)).length; - // log.debug((new StringBuffer("arraysize=")).append(arraysize) - // .toString()); - // } + int pos = offset; + boolean haveSerializedIndex = false; for (int k = 0; k < records.size(); k++) { -// byte[] rec = (( byte [] ) bytes.get(k)); - // System.arraycopy(rec, 0, data, offset + pos, rec.length); Record record = (( Record ) records.get(k)); + int startPos = pos; + //Once the rows have been found in the list of records, start + //writing out the blocked row information. This includes the DBCell references + if (record instanceof RowRecordsAggregate) { + pos += ((RowRecordsAggregate)record).serialize(pos, data, cells); // rec.length; + } else if (record instanceof ValueRecordsAggregate) { + //Do nothing here. The records were serialized during the RowRecordAggregate block serialization + } else { + pos += record.serialize(pos, data ); // rec.length; + } - //uncomment to test record sizes -// byte[] data2 = new byte[record.getRecordSize()]; -// record.serialize(0, data2 ); // rec.length; -// if (LittleEndian.getUShort(data2, 2) != record.getRecordSize() - 4 -// && record instanceof RowRecordsAggregate == false && record instanceof ValueRecordsAggregate == false) -// throw new RuntimeException("Blah!!!"); - - pos += record.serialize(pos + offset, data ); // rec.length; - + //If the BOF record was just serialized then add the IndexRecord + if (record.getSid() == BOFRecord.sid) { + //Can there be more than one BOF for a sheet? If not then we can + //remove this guard. So be safe it is left here. + if (!haveSerializedIndex) { + haveSerializedIndex = true; + pos += serializeIndexRecord(k, pos, data); + } + } } log.log(log.DEBUG, "Sheet.serialize returning "); - return pos; + return pos-offset; + } + + private int serializeIndexRecord(final int BOFRecordIndex, final int offset, byte[] data) { + IndexRecord index = new IndexRecord(); + index.setFirstRow(rows.getFirstRowNum()); + index.setLastRowAdd1(rows.getLastRowNum()+1); + //Calculate the size of the records from the end of the BOF + //and up to the RowRecordsAggregate... + int sheetRecSize = 0; + for (int j = BOFRecordIndex+1; j < records.size(); j++) + { + Record tmpRec = (( Record ) records.get(j)); + if (tmpRec instanceof RowRecordsAggregate) + break; + sheetRecSize+= tmpRec.getRecordSize(); + } + //Add the references to the DBCells in the IndexRecord (one for each block) + int blockCount = rows.getRowBlockCount(); + //Calculate the size of this IndexRecord + int indexRecSize = index.getRecordSizeForBlockCount(blockCount); + + int rowBlockOffset = 0; + int cellBlockOffset = 0; + int dbCellOffset = 0; + for (int block=0;block + * Title: DBCell Record + * Description: Used by Excel and other MS apps to quickly find rows in the sheets.

* REFERENCE: PG 299/440 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)

* @author Andrew C. Oliver (acoliver at apache dot org) + * @author Jason Height * @version 2.0-pre */ public class DBCellRecord extends Record { + public final static int BLOCK_SIZE = 32; public final static short sid = 0xd7; private int field_1_row_offset; private short[] field_2_cell_offsets; @@ -217,7 +219,7 @@ public class DBCellRecord LittleEndian.putInt(data, 4 + offset, getRowOffset()); for (int k = 0; k < getNumCellOffsets(); k++) { - LittleEndian.putShort(data, 8 + k + offset, getCellOffsetAt(k)); + LittleEndian.putShort(data, 8 + 2*k + offset, getCellOffsetAt(k)); } return getRecordSize(); } @@ -227,6 +229,11 @@ public class DBCellRecord return 8 + (getNumCellOffsets() * 2); } + /** Returns the size of a DBCellRecord when it needs to reference a certain number of rows*/ + public static int getRecordSizeForRows(int rows) { + return 8 + (rows * 2); + } + public short getSid() { return this.sid; diff --git a/src/java/org/apache/poi/hssf/record/ExtSSTInfoSubRecord.java b/src/java/org/apache/poi/hssf/record/ExtSSTInfoSubRecord.java index 6af6ec3b9..da1d2178d 100644 --- a/src/java/org/apache/poi/hssf/record/ExtSSTInfoSubRecord.java +++ b/src/java/org/apache/poi/hssf/record/ExtSSTInfoSubRecord.java @@ -65,8 +65,8 @@ import org.apache.poi.util.LittleEndian; /** * Extended SST table info subrecord

* contains the elements of "info" in the SST's array field

- * WE HAVE VERY LITTLE INFORMATION ON HOW TO IMPLEMENT THIS RECORD! (EXTSSST)

* @author Andrew C. Oliver (acoliver at apache dot org) + * @author Jason Height * @version 2.0-pre * @see org.apache.poi.hssf.record.ExtSSTRecord */ diff --git a/src/java/org/apache/poi/hssf/record/ExtSSTRecord.java b/src/java/org/apache/poi/hssf/record/ExtSSTRecord.java index e825987c3..51ce742e2 100644 --- a/src/java/org/apache/poi/hssf/record/ExtSSTRecord.java +++ b/src/java/org/apache/poi/hssf/record/ExtSSTRecord.java @@ -61,13 +61,13 @@ import java.util.ArrayList; /** * Title: Extended Static String Table

- * Description: I really don't understand this thing... its supposed to be "a hash - * table for optimizing external copy operations" -- - *

- * This sounds like a job for Marc "BitMaster" Johnson aka the - * "Hawaiian Master Chef".

+ * Description: This record is used for a quick lookup into the SST record. This + * record breaks the SST table into a set of buckets. The offsets + * to these buckets within the SST record are kept as well as the + * position relative to the start of the SST record. * REFERENCE: PG 313 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)

* @author Andrew C. Oliver (acoliver at apache dot org) + * @author Jason Height * @version 2.0-pre * @see org.apache.poi.hssf.record.ExtSSTInfoSubRecord */ @@ -75,8 +75,9 @@ import java.util.ArrayList; public class ExtSSTRecord extends Record { + private static final int DEFAULT_BUCKET_SIZE = 8; public final static short sid = 0xff; - private short field_1_strings_per_bucket; + private short field_1_strings_per_bucket = DEFAULT_BUCKET_SIZE; private ArrayList field_2_sst_info; @@ -120,12 +121,11 @@ public class ExtSSTRecord } } - // this probably doesn't work but we don't really care at this point protected void fillFields(byte [] data, short size, int offset) { field_2_sst_info = new ArrayList(); field_1_strings_per_bucket = LittleEndian.getShort(data, 0 + offset); - for (int k = 2; k < ((data.length - offset) - size); k += 8) + for (int k = 2; k < (size-offset); k += 8) { byte[] tempdata = new byte[ 8 + offset ]; @@ -196,16 +196,16 @@ public class ExtSSTRecord for (int k = 0; k < getNumInfoRecords(); k++) { - System.arraycopy(getInfoRecordAt(k).serialize(), 0, data, - pos + offset, 8); - pos += getInfoRecordAt(k).getRecordSize(); + ExtSSTInfoSubRecord rec = getInfoRecordAt(k); + pos += rec.serialize(pos + offset, data); } - return getRecordSize(); + + return pos; } public int getRecordSize() { - return 4 + 2 + field_2_sst_info.size() * 8; + return 6+8*getNumInfoRecords(); } public short getSid() diff --git a/src/java/org/apache/poi/hssf/record/IndexRecord.java b/src/java/org/apache/poi/hssf/record/IndexRecord.java index 836e07e09..e38382249 100644 --- a/src/java/org/apache/poi/hssf/record/IndexRecord.java +++ b/src/java/org/apache/poi/hssf/record/IndexRecord.java @@ -222,6 +222,13 @@ public class IndexRecord return 20 + (getNumDbcells() * 4); } + /** Returns the size of an INdexRecord when it needs to index the specified number of blocks + * + */ + public static int getRecordSizeForBlockCount(int blockCount) { + return 20 + (4 * blockCount); + } + public short getSid() { return this.sid; diff --git a/src/java/org/apache/poi/hssf/record/SSTDeserializer.java b/src/java/org/apache/poi/hssf/record/SSTDeserializer.java index dcf4e50b9..9d140c516 100644 --- a/src/java/org/apache/poi/hssf/record/SSTDeserializer.java +++ b/src/java/org/apache/poi/hssf/record/SSTDeserializer.java @@ -249,7 +249,6 @@ class SSTDeserializer */ static public void addToStringTable( BinaryTree strings, Integer integer, UnicodeString string ) { - if ( string.isRichText() ) string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~8 ) ) ); if ( string.isExtendedText() ) diff --git a/src/java/org/apache/poi/hssf/record/SSTRecord.java b/src/java/org/apache/poi/hssf/record/SSTRecord.java index 9cd941121..32c3842ab 100644 --- a/src/java/org/apache/poi/hssf/record/SSTRecord.java +++ b/src/java/org/apache/poi/hssf/record/SSTRecord.java @@ -586,7 +586,10 @@ public class SSTRecord */ public int calcExtSSTRecordSize() { - return 4 + 2 + ((field_3_strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE) + 1) * 8; + int infoRecs = (field_3_strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE); + if ((field_3_strings.size() % SSTSerializer.DEFAULT_BUCKET_SIZE) != 0) + infoRecs ++; + return 4 + 2 + (infoRecs * 8); } } diff --git a/src/java/org/apache/poi/hssf/record/SSTSerializer.java b/src/java/org/apache/poi/hssf/record/SSTSerializer.java index 69e7af87d..905770b2c 100644 --- a/src/java/org/apache/poi/hssf/record/SSTSerializer.java +++ b/src/java/org/apache/poi/hssf/record/SSTSerializer.java @@ -93,8 +93,11 @@ class SSTSerializer this.numUniqueStrings = numUniqueStrings; this.sstRecordHeader = new SSTRecordHeader( numStrings, numUniqueStrings ); - this.bucketAbsoluteOffsets = new int[strings.size()/DEFAULT_BUCKET_SIZE+1]; - this.bucketRelativeOffsets = new int[strings.size()/DEFAULT_BUCKET_SIZE+1]; + int infoRecs = (strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE); + if ((strings.size() % SSTSerializer.DEFAULT_BUCKET_SIZE) != 0) + infoRecs ++; + this.bucketAbsoluteOffsets = new int[infoRecs]; + this.bucketRelativeOffsets = new int[infoRecs]; } /** diff --git a/src/java/org/apache/poi/hssf/record/aggregates/RowRecordsAggregate.java b/src/java/org/apache/poi/hssf/record/aggregates/RowRecordsAggregate.java index 09587af81..1bd27c941 100644 --- a/src/java/org/apache/poi/hssf/record/aggregates/RowRecordsAggregate.java +++ b/src/java/org/apache/poi/hssf/record/aggregates/RowRecordsAggregate.java @@ -57,6 +57,7 @@ package org.apache.poi.hssf.record.aggregates; import org.apache.poi.hssf.record.Record; import org.apache.poi.hssf.record.RowRecord; +import org.apache.poi.hssf.record.DBCellRecord; import org.apache.poi.hssf.record.UnknownRecord; import java.util.Map; @@ -169,6 +170,86 @@ public class RowRecordsAggregate return k; } + /** Returns the number of row blocks. + *

The row blocks are goupings of rows that contain the DBCell record + * after them + */ + public int getRowBlockCount() { + int size = records.size()/DBCellRecord.BLOCK_SIZE; + if ((records.size() % DBCellRecord.BLOCK_SIZE) != 0) + size++; + return size; + } + + public int getRowBlockSize(int block) { + return 20 * getRowCountForBlock(block); + } + + /** Returns the number of physical rows within a block*/ + public int getRowCountForBlock(int block) { + int startIndex = block * DBCellRecord.BLOCK_SIZE; + int endIndex = startIndex + DBCellRecord.BLOCK_SIZE - 1; + if (endIndex >= records.size()) + endIndex = records.size()-1; + + return endIndex-startIndex+1; + } + + /** Returns the physical row number of the first row in a block*/ + public int getStartRowNumberForBlock(int block) { + //JMH Damn! I would like to directly index a record in the map rather than + //iterating through it. + int startIndex = block * DBCellRecord.BLOCK_SIZE; + Iterator rowIter = records.values().iterator(); + RowRecord row = null; + //Position the iterator at the start of the block + for (int i=0; i<=startIndex;i++) { + row = (RowRecord)rowIter.next(); + } + + return row.getRowNumber(); + } + + /** Returns the physical row number of the end row in a block*/ + public int getEndRowNumberForBlock(int block) { + //JMH Damn! I would like to directly index a record in the map rather than + //iterating through it. + int endIndex = ((block + 1)*DBCellRecord.BLOCK_SIZE)-1; + if (endIndex >= records.size()) + endIndex = records.size()-1; + + Iterator rowIter = records.values().iterator(); + RowRecord row = null; + for (int i=0; i<=endIndex;i++) { + row = (RowRecord)rowIter.next(); + } + return row.getRowNumber(); + } + + + /** Serializes a block of the rows */ + private int serializeRowBlock(final int block, final int offset, byte[] data) { + final int startIndex = block*DBCellRecord.BLOCK_SIZE; + final int endIndex = startIndex + DBCellRecord.BLOCK_SIZE; + + Iterator rowIterator = records.values().iterator(); + int pos = offset; + + //JMH TBD create an iterator that can start at a specific index. + int i=0; + for (;i=startRow) && (row <= endRow)) + size += ((Record)cell).getRecordSize(); + } + return size; + } + + /** Returns true if the row has cells attached to it */ + public boolean rowHasCells(int row) { + IntList ctRow = (IntList) celltype.get(row); + return ((ctRow != null) && (ctRow.size() > 0)); + } + + /** Serializes the cells that are allocated to a certain row range*/ + public int serializeCellRow(final int row, int offset, byte [] data) + { + Iterator itr = new VRAIterator(this, row); + int pos = offset; + + while (itr.hasNext()) + { + CellValueRecordInterface cell = (CellValueRecordInterface)itr.next(); + pos += (( Record ) cell).serialize(pos, data); + } + return pos - offset; + } + + public int construct(int offset, List records) { @@ -512,30 +533,33 @@ public class ValueRecordsAggregate class VRAIterator implements Iterator { private boolean hasNext; private ValueRecordsAggregate vra; - int popindex; - int row; - int rowlimit; - int col; + private int popindex; + private int row; + private int rowlimit; + private int col; CellValueRecordInterface current = null; CellValueRecordInterface next = null; public VRAIterator(ValueRecordsAggregate vra) { - this.vra = vra; - this.rowlimit = -1; - popindex = 0; - if (vra.getPhysicalNumberOfCells() > 0) { - hasNext = true; - next = findNextCell(null); - } + this(vra, 0, -1); } public VRAIterator(ValueRecordsAggregate vra, int row) { - this(vra); - rowlimit = row; - this.row = row; - this.popindex = vra.populatedRows.indexOf(row); + this(vra, row, row); } + public VRAIterator(ValueRecordsAggregate vra, int startRow, int endRow) { + this.vra = vra; + this.row = startRow; + this.rowlimit = endRow; + this.popindex = vra.populatedRows.indexOf(row); + if (vra.getPhysicalNumberOfCells() > 0) { + next = findNextCell(null); + hasNext = (next != null); + } + } + + public boolean hasNext() { return hasNext; } @@ -575,7 +599,7 @@ class VRAIterator implements Iterator { rowNum = vra.populatedRows.get(popindex); ctRow = (IntList)vra.celltype.get(rowNum); if (ctRow.size() == 0) { - if (rowlimit == -1) { + if ((rowlimit == -1)||(rowNum<=rowlimit)) { popindex++; } else { this.hasNext = false; @@ -592,8 +616,11 @@ class VRAIterator implements Iterator { colNum = newCol; if (colNum == -1) { //end of row, forward one row popindex++; - if (popindex < vra.populatedRows.size() && rowlimit == -1) { + if (popindex < vra.populatedRows.size() && ((rowlimit == -1)||(rowNum<=rowlimit))) { rowNum = vra.populatedRows.get(popindex); + //Return null if the row is out of range + if ((rowlimit != -1) &&( rowNum > rowlimit)) + return null; } else { return null; }