diff --git a/src/java/org/apache/poi/hssf/model/Sheet.java b/src/java/org/apache/poi/hssf/model/Sheet.java index d596cc9ac..456606bc2 100644 --- a/src/java/org/apache/poi/hssf/model/Sheet.java +++ b/src/java/org/apache/poi/hssf/model/Sheet.java @@ -288,6 +288,15 @@ public class Sheet implements Model { retval.windowTwo = (WindowTwoRecord) rec; } + else if ( rec.getSid() == DBCellRecord.sid ) + { + rec = null; + } + else if ( rec.getSid() == IndexRecord.sid ) + { + rec = null; + } + if (rec != null) { @@ -700,49 +709,6 @@ public class Sheet implements Model return preoffset; } - /** - * Serializes all records in the sheet into one big byte array. Use this to write - * the sheet out. - * - * @return byte[] array containing the binary representation of the records in this sheet - * - */ - - public byte [] serialize() - { - log.log(log.DEBUG, "Sheet.serialize"); - - // addDBCellRecords(); - byte[] retval = null; - - // ArrayList bytes = new ArrayList(4096); - int arraysize = getSize(); - int pos = 0; - - // for (int k = 0; k < records.size(); k++) - // { - // bytes.add((( Record ) records.get(k)).serialize()); - // - // } - // for (int k = 0; k < bytes.size(); k++) - // { - // arraysize += (( byte [] ) bytes.get(k)).length; - // log.debug((new StringBuffer("arraysize=")).append(arraysize) - // .toString()); - // } - retval = new byte[ arraysize ]; - for (int k = 0; k < records.size(); k++) - { - - // byte[] rec = (( byte [] ) bytes.get(k)); - // System.arraycopy(rec, 0, retval, pos, rec.length); - pos += (( Record ) records.get(k)).serialize(pos, - retval); // rec.length; - } - log.log(log.DEBUG, "Sheet.serialize returning " + retval); - return retval; - } - /** * Serializes all records in the sheet into one big byte array. Use this to write * the sheet out. @@ -756,40 +722,69 @@ public class Sheet implements Model { log.log(log.DEBUG, "Sheet.serialize using offsets"); - // addDBCellRecords(); - // ArrayList bytes = new ArrayList(4096); - // int arraysize = getSize(); // 0; - int pos = 0; - - // for (int k = 0; k < records.size(); k++) - // { - // bytes.add((( Record ) records.get(k)).serialize()); - // - // } - // for (int k = 0; k < bytes.size(); k++) - // { - // arraysize += (( byte [] ) bytes.get(k)).length; - // log.debug((new StringBuffer("arraysize=")).append(arraysize) - // .toString()); - // } + int pos = offset; + boolean haveSerializedIndex = false; for (int k = 0; k < records.size(); k++) { -// byte[] rec = (( byte [] ) bytes.get(k)); - // System.arraycopy(rec, 0, data, offset + pos, rec.length); Record record = (( Record ) records.get(k)); + int startPos = pos; + //Once the rows have been found in the list of records, start + //writing out the blocked row information. This includes the DBCell references + if (record instanceof RowRecordsAggregate) { + pos += ((RowRecordsAggregate)record).serialize(pos, data, cells); // rec.length; + } else if (record instanceof ValueRecordsAggregate) { + //Do nothing here. The records were serialized during the RowRecordAggregate block serialization + } else { + pos += record.serialize(pos, data ); // rec.length; + } - //uncomment to test record sizes -// byte[] data2 = new byte[record.getRecordSize()]; -// record.serialize(0, data2 ); // rec.length; -// if (LittleEndian.getUShort(data2, 2) != record.getRecordSize() - 4 -// && record instanceof RowRecordsAggregate == false && record instanceof ValueRecordsAggregate == false) -// throw new RuntimeException("Blah!!!"); - - pos += record.serialize(pos + offset, data ); // rec.length; - + //If the BOF record was just serialized then add the IndexRecord + if (record.getSid() == BOFRecord.sid) { + //Can there be more than one BOF for a sheet? If not then we can + //remove this guard. So be safe it is left here. + if (!haveSerializedIndex) { + haveSerializedIndex = true; + pos += serializeIndexRecord(k, pos, data); + } + } } log.log(log.DEBUG, "Sheet.serialize returning "); - return pos; + return pos-offset; + } + + private int serializeIndexRecord(final int BOFRecordIndex, final int offset, byte[] data) { + IndexRecord index = new IndexRecord(); + index.setFirstRow(rows.getFirstRowNum()); + index.setLastRowAdd1(rows.getLastRowNum()+1); + //Calculate the size of the records from the end of the BOF + //and up to the RowRecordsAggregate... + int sheetRecSize = 0; + for (int j = BOFRecordIndex+1; j < records.size(); j++) + { + Record tmpRec = (( Record ) records.get(j)); + if (tmpRec instanceof RowRecordsAggregate) + break; + sheetRecSize+= tmpRec.getRecordSize(); + } + //Add the references to the DBCells in the IndexRecord (one for each block) + int blockCount = rows.getRowBlockCount(); + //Calculate the size of this IndexRecord + int indexRecSize = index.getRecordSizeForBlockCount(blockCount); + + int rowBlockOffset = 0; + int cellBlockOffset = 0; + int dbCellOffset = 0; + for (int block=0;block + * Title: DBCell Record + * Description: Used by Excel and other MS apps to quickly find rows in the sheets.

* REFERENCE: PG 299/440 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)

* @author Andrew C. Oliver (acoliver at apache dot org) + * @author Jason Height * @version 2.0-pre */ public class DBCellRecord extends Record { + public final static int BLOCK_SIZE = 32; public final static short sid = 0xd7; private int field_1_row_offset; private short[] field_2_cell_offsets; @@ -217,7 +219,7 @@ public class DBCellRecord LittleEndian.putInt(data, 4 + offset, getRowOffset()); for (int k = 0; k < getNumCellOffsets(); k++) { - LittleEndian.putShort(data, 8 + k + offset, getCellOffsetAt(k)); + LittleEndian.putShort(data, 8 + 2*k + offset, getCellOffsetAt(k)); } return getRecordSize(); } @@ -227,6 +229,11 @@ public class DBCellRecord return 8 + (getNumCellOffsets() * 2); } + /** Returns the size of a DBCellRecord when it needs to reference a certain number of rows*/ + public static int getRecordSizeForRows(int rows) { + return 8 + (rows * 2); + } + public short getSid() { return this.sid; diff --git a/src/java/org/apache/poi/hssf/record/ExtSSTInfoSubRecord.java b/src/java/org/apache/poi/hssf/record/ExtSSTInfoSubRecord.java index 6af6ec3b9..da1d2178d 100644 --- a/src/java/org/apache/poi/hssf/record/ExtSSTInfoSubRecord.java +++ b/src/java/org/apache/poi/hssf/record/ExtSSTInfoSubRecord.java @@ -65,8 +65,8 @@ import org.apache.poi.util.LittleEndian; /** * Extended SST table info subrecord

* contains the elements of "info" in the SST's array field

- * WE HAVE VERY LITTLE INFORMATION ON HOW TO IMPLEMENT THIS RECORD! (EXTSSST)

* @author Andrew C. Oliver (acoliver at apache dot org) + * @author Jason Height * @version 2.0-pre * @see org.apache.poi.hssf.record.ExtSSTRecord */ diff --git a/src/java/org/apache/poi/hssf/record/ExtSSTRecord.java b/src/java/org/apache/poi/hssf/record/ExtSSTRecord.java index e825987c3..51ce742e2 100644 --- a/src/java/org/apache/poi/hssf/record/ExtSSTRecord.java +++ b/src/java/org/apache/poi/hssf/record/ExtSSTRecord.java @@ -61,13 +61,13 @@ import java.util.ArrayList; /** * Title: Extended Static String Table

- * Description: I really don't understand this thing... its supposed to be "a hash - * table for optimizing external copy operations" -- - *

- * This sounds like a job for Marc "BitMaster" Johnson aka the - * "Hawaiian Master Chef".

+ * Description: This record is used for a quick lookup into the SST record. This + * record breaks the SST table into a set of buckets. The offsets + * to these buckets within the SST record are kept as well as the + * position relative to the start of the SST record. * REFERENCE: PG 313 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)

* @author Andrew C. Oliver (acoliver at apache dot org) + * @author Jason Height * @version 2.0-pre * @see org.apache.poi.hssf.record.ExtSSTInfoSubRecord */ @@ -75,8 +75,9 @@ import java.util.ArrayList; public class ExtSSTRecord extends Record { + private static final int DEFAULT_BUCKET_SIZE = 8; public final static short sid = 0xff; - private short field_1_strings_per_bucket; + private short field_1_strings_per_bucket = DEFAULT_BUCKET_SIZE; private ArrayList field_2_sst_info; @@ -120,12 +121,11 @@ public class ExtSSTRecord } } - // this probably doesn't work but we don't really care at this point protected void fillFields(byte [] data, short size, int offset) { field_2_sst_info = new ArrayList(); field_1_strings_per_bucket = LittleEndian.getShort(data, 0 + offset); - for (int k = 2; k < ((data.length - offset) - size); k += 8) + for (int k = 2; k < (size-offset); k += 8) { byte[] tempdata = new byte[ 8 + offset ]; @@ -196,16 +196,16 @@ public class ExtSSTRecord for (int k = 0; k < getNumInfoRecords(); k++) { - System.arraycopy(getInfoRecordAt(k).serialize(), 0, data, - pos + offset, 8); - pos += getInfoRecordAt(k).getRecordSize(); + ExtSSTInfoSubRecord rec = getInfoRecordAt(k); + pos += rec.serialize(pos + offset, data); } - return getRecordSize(); + + return pos; } public int getRecordSize() { - return 4 + 2 + field_2_sst_info.size() * 8; + return 6+8*getNumInfoRecords(); } public short getSid() diff --git a/src/java/org/apache/poi/hssf/record/IndexRecord.java b/src/java/org/apache/poi/hssf/record/IndexRecord.java index 836e07e09..e38382249 100644 --- a/src/java/org/apache/poi/hssf/record/IndexRecord.java +++ b/src/java/org/apache/poi/hssf/record/IndexRecord.java @@ -222,6 +222,13 @@ public class IndexRecord return 20 + (getNumDbcells() * 4); } + /** Returns the size of an INdexRecord when it needs to index the specified number of blocks + * + */ + public static int getRecordSizeForBlockCount(int blockCount) { + return 20 + (4 * blockCount); + } + public short getSid() { return this.sid; diff --git a/src/java/org/apache/poi/hssf/record/SSTDeserializer.java b/src/java/org/apache/poi/hssf/record/SSTDeserializer.java index dcf4e50b9..9d140c516 100644 --- a/src/java/org/apache/poi/hssf/record/SSTDeserializer.java +++ b/src/java/org/apache/poi/hssf/record/SSTDeserializer.java @@ -249,7 +249,6 @@ class SSTDeserializer */ static public void addToStringTable( BinaryTree strings, Integer integer, UnicodeString string ) { - if ( string.isRichText() ) string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~8 ) ) ); if ( string.isExtendedText() ) diff --git a/src/java/org/apache/poi/hssf/record/SSTRecord.java b/src/java/org/apache/poi/hssf/record/SSTRecord.java index 9cd941121..32c3842ab 100644 --- a/src/java/org/apache/poi/hssf/record/SSTRecord.java +++ b/src/java/org/apache/poi/hssf/record/SSTRecord.java @@ -586,7 +586,10 @@ public class SSTRecord */ public int calcExtSSTRecordSize() { - return 4 + 2 + ((field_3_strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE) + 1) * 8; + int infoRecs = (field_3_strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE); + if ((field_3_strings.size() % SSTSerializer.DEFAULT_BUCKET_SIZE) != 0) + infoRecs ++; + return 4 + 2 + (infoRecs * 8); } } diff --git a/src/java/org/apache/poi/hssf/record/SSTSerializer.java b/src/java/org/apache/poi/hssf/record/SSTSerializer.java index 69e7af87d..905770b2c 100644 --- a/src/java/org/apache/poi/hssf/record/SSTSerializer.java +++ b/src/java/org/apache/poi/hssf/record/SSTSerializer.java @@ -93,8 +93,11 @@ class SSTSerializer this.numUniqueStrings = numUniqueStrings; this.sstRecordHeader = new SSTRecordHeader( numStrings, numUniqueStrings ); - this.bucketAbsoluteOffsets = new int[strings.size()/DEFAULT_BUCKET_SIZE+1]; - this.bucketRelativeOffsets = new int[strings.size()/DEFAULT_BUCKET_SIZE+1]; + int infoRecs = (strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE); + if ((strings.size() % SSTSerializer.DEFAULT_BUCKET_SIZE) != 0) + infoRecs ++; + this.bucketAbsoluteOffsets = new int[infoRecs]; + this.bucketRelativeOffsets = new int[infoRecs]; } /** diff --git a/src/java/org/apache/poi/hssf/record/aggregates/RowRecordsAggregate.java b/src/java/org/apache/poi/hssf/record/aggregates/RowRecordsAggregate.java index 09587af81..1bd27c941 100644 --- a/src/java/org/apache/poi/hssf/record/aggregates/RowRecordsAggregate.java +++ b/src/java/org/apache/poi/hssf/record/aggregates/RowRecordsAggregate.java @@ -57,6 +57,7 @@ package org.apache.poi.hssf.record.aggregates; import org.apache.poi.hssf.record.Record; import org.apache.poi.hssf.record.RowRecord; +import org.apache.poi.hssf.record.DBCellRecord; import org.apache.poi.hssf.record.UnknownRecord; import java.util.Map; @@ -169,6 +170,86 @@ public class RowRecordsAggregate return k; } + /** Returns the number of row blocks. + *

The row blocks are goupings of rows that contain the DBCell record + * after them + */ + public int getRowBlockCount() { + int size = records.size()/DBCellRecord.BLOCK_SIZE; + if ((records.size() % DBCellRecord.BLOCK_SIZE) != 0) + size++; + return size; + } + + public int getRowBlockSize(int block) { + return 20 * getRowCountForBlock(block); + } + + /** Returns the number of physical rows within a block*/ + public int getRowCountForBlock(int block) { + int startIndex = block * DBCellRecord.BLOCK_SIZE; + int endIndex = startIndex + DBCellRecord.BLOCK_SIZE - 1; + if (endIndex >= records.size()) + endIndex = records.size()-1; + + return endIndex-startIndex+1; + } + + /** Returns the physical row number of the first row in a block*/ + public int getStartRowNumberForBlock(int block) { + //JMH Damn! I would like to directly index a record in the map rather than + //iterating through it. + int startIndex = block * DBCellRecord.BLOCK_SIZE; + Iterator rowIter = records.values().iterator(); + RowRecord row = null; + //Position the iterator at the start of the block + for (int i=0; i<=startIndex;i++) { + row = (RowRecord)rowIter.next(); + } + + return row.getRowNumber(); + } + + /** Returns the physical row number of the end row in a block*/ + public int getEndRowNumberForBlock(int block) { + //JMH Damn! I would like to directly index a record in the map rather than + //iterating through it. + int endIndex = ((block + 1)*DBCellRecord.BLOCK_SIZE)-1; + if (endIndex >= records.size()) + endIndex = records.size()-1; + + Iterator rowIter = records.values().iterator(); + RowRecord row = null; + for (int i=0; i<=endIndex;i++) { + row = (RowRecord)rowIter.next(); + } + return row.getRowNumber(); + } + + + /** Serializes a block of the rows */ + private int serializeRowBlock(final int block, final int offset, byte[] data) { + final int startIndex = block*DBCellRecord.BLOCK_SIZE; + final int endIndex = startIndex + DBCellRecord.BLOCK_SIZE; + + Iterator rowIterator = records.values().iterator(); + int pos = offset; + + //JMH TBD create an iterator that can start at a specific index. + int i=0; + for (;i=startRow) && (row <= endRow)) + size += ((Record)cell).getRecordSize(); + } + return size; + } + + /** Returns true if the row has cells attached to it */ + public boolean rowHasCells(int row) { + IntList ctRow = (IntList) celltype.get(row); + return ((ctRow != null) && (ctRow.size() > 0)); + } + + /** Serializes the cells that are allocated to a certain row range*/ + public int serializeCellRow(final int row, int offset, byte [] data) + { + Iterator itr = new VRAIterator(this, row); + int pos = offset; + + while (itr.hasNext()) + { + CellValueRecordInterface cell = (CellValueRecordInterface)itr.next(); + pos += (( Record ) cell).serialize(pos, data); + } + return pos - offset; + } + + public int construct(int offset, List records) { @@ -512,30 +533,33 @@ public class ValueRecordsAggregate class VRAIterator implements Iterator { private boolean hasNext; private ValueRecordsAggregate vra; - int popindex; - int row; - int rowlimit; - int col; + private int popindex; + private int row; + private int rowlimit; + private int col; CellValueRecordInterface current = null; CellValueRecordInterface next = null; public VRAIterator(ValueRecordsAggregate vra) { - this.vra = vra; - this.rowlimit = -1; - popindex = 0; - if (vra.getPhysicalNumberOfCells() > 0) { - hasNext = true; - next = findNextCell(null); - } + this(vra, 0, -1); } public VRAIterator(ValueRecordsAggregate vra, int row) { - this(vra); - rowlimit = row; - this.row = row; - this.popindex = vra.populatedRows.indexOf(row); + this(vra, row, row); } + public VRAIterator(ValueRecordsAggregate vra, int startRow, int endRow) { + this.vra = vra; + this.row = startRow; + this.rowlimit = endRow; + this.popindex = vra.populatedRows.indexOf(row); + if (vra.getPhysicalNumberOfCells() > 0) { + next = findNextCell(null); + hasNext = (next != null); + } + } + + public boolean hasNext() { return hasNext; } @@ -575,7 +599,7 @@ class VRAIterator implements Iterator { rowNum = vra.populatedRows.get(popindex); ctRow = (IntList)vra.celltype.get(rowNum); if (ctRow.size() == 0) { - if (rowlimit == -1) { + if ((rowlimit == -1)||(rowNum<=rowlimit)) { popindex++; } else { this.hasNext = false; @@ -592,8 +616,11 @@ class VRAIterator implements Iterator { colNum = newCol; if (colNum == -1) { //end of row, forward one row popindex++; - if (popindex < vra.populatedRows.size() && rowlimit == -1) { + if (popindex < vra.populatedRows.size() && ((rowlimit == -1)||(rowNum<=rowlimit))) { rowNum = vra.populatedRows.get(popindex); + //Return null if the row is out of range + if ((rowlimit != -1) &&( rowNum > rowlimit)) + return null; } else { return null; }