Introduced ContinuableRecord to help fix serialization of StringRecords with large data. Fixed TextObjectRecord to only write 16bit unicode when needed. Simplification in UnicodeString.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@711749 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Josh Micich 2008-11-06 01:12:41 +00:00
parent 603bd64f73
commit bcd1a9a1c5
19 changed files with 827 additions and 959 deletions

View File

@ -379,8 +379,6 @@ public final class RecordFactory {
records.add(record); records.add(record);
} else if (lastRecord instanceof DrawingGroupRecord) { } else if (lastRecord instanceof DrawingGroupRecord) {
((DrawingGroupRecord)lastRecord).processContinueRecord(contRec.getData()); ((DrawingGroupRecord)lastRecord).processContinueRecord(contRec.getData());
} else if (lastRecord instanceof StringRecord) {
((StringRecord)lastRecord).processContinueRecord(contRec.getData());
} else if (lastRecord instanceof UnknownRecord) { } else if (lastRecord instanceof UnknownRecord) {
//Gracefully handle records that we don't know about, //Gracefully handle records that we don't know about,
//that happen to be continued //that happen to be continued

View File

@ -17,14 +17,16 @@
package org.apache.poi.hssf.record; package org.apache.poi.hssf.record;
import java.util.Iterator;
import org.apache.poi.hssf.record.cont.ContinuableRecord;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
import org.apache.poi.util.IntMapper; import org.apache.poi.util.IntMapper;
import org.apache.poi.util.LittleEndianConsts; import org.apache.poi.util.LittleEndianConsts;
import java.util.Iterator;
/** /**
* Title: Static String Table Record * Title: Static String Table Record (0x00FC)<p/>
* <P> *
* Description: This holds all the strings for LabelSSTRecords. * Description: This holds all the strings for LabelSSTRecords.
* <P> * <P>
* REFERENCE: PG 389 Microsoft Excel 97 Developer's Kit (ISBN: * REFERENCE: PG 389 Microsoft Excel 97 Developer's Kit (ISBN:
@ -37,27 +39,20 @@ import java.util.Iterator;
* @see org.apache.poi.hssf.record.LabelSSTRecord * @see org.apache.poi.hssf.record.LabelSSTRecord
* @see org.apache.poi.hssf.record.ContinueRecord * @see org.apache.poi.hssf.record.ContinueRecord
*/ */
public final class SSTRecord extends Record { public final class SSTRecord extends ContinuableRecord {
public static final short sid = 0x00FC; public static final short sid = 0x00FC;
private static UnicodeString EMPTY_STRING = new UnicodeString(""); private static final UnicodeString EMPTY_STRING = new UnicodeString("");
/** how big can an SST record be? As big as any record can be: 8228 bytes */
static final int MAX_RECORD_SIZE = 8228;
// TODO - move these constants to test class (the only consumer)
/** standard record overhead: two shorts (record id plus data space size)*/ /** standard record overhead: two shorts (record id plus data space size)*/
static final int STD_RECORD_OVERHEAD = static final int STD_RECORD_OVERHEAD = 2 * LittleEndianConsts.SHORT_SIZE;
2 * LittleEndianConsts.SHORT_SIZE;
/** SST overhead: the standard record overhead, plus the number of strings and the number of unique strings -- two ints */ /** SST overhead: the standard record overhead, plus the number of strings and the number of unique strings -- two ints */
static final int SST_RECORD_OVERHEAD = static final int SST_RECORD_OVERHEAD = STD_RECORD_OVERHEAD + 2 * LittleEndianConsts.INT_SIZE;
( STD_RECORD_OVERHEAD + ( 2 * LittleEndianConsts.INT_SIZE ) );
/** how much data can we stuff into an SST record? That would be _max minus the standard SST record overhead */ /** how much data can we stuff into an SST record? That would be _max minus the standard SST record overhead */
static final int MAX_DATA_SPACE = MAX_RECORD_SIZE - SST_RECORD_OVERHEAD; static final int MAX_DATA_SPACE = RecordInputStream.MAX_RECORD_DATA_SIZE - 8;
/** overhead for each string includes the string's character count (a short) and the flag describing its characteristics (a byte) */
static final int STRING_MINIMAL_OVERHEAD = LittleEndianConsts.SHORT_SIZE + LittleEndianConsts.BYTE_SIZE;
/** union of strings in the SST and EXTSST */ /** union of strings in the SST and EXTSST */
private int field_1_num_strings; private int field_1_num_strings;
@ -133,37 +128,6 @@ public final class SSTRecord extends Record {
return field_2_num_unique_strings; return field_2_num_unique_strings;
} }
/**
* USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
* METHODS MANIPULATE THE NUMBER OF STRINGS AS A SIDE EFFECT; YOUR
* ATTEMPTS AT MANIPULATING THE STRING COUNT IS LIKELY TO BE VERY
* WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN THIS RECORD IS
* WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ THE RECORD
*
* @param count number of strings
*
*/
public void setNumStrings( final int count )
{
field_1_num_strings = count;
}
/**
* USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
* METHODS MANIPULATE THE NUMBER OF UNIQUE STRINGS AS A SIDE
* EFFECT; YOUR ATTEMPTS AT MANIPULATING THE UNIQUE STRING COUNT
* IS LIKELY TO BE VERY WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN
* THIS RECORD IS WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ
* THE RECORD
*
* @param count number of strings
*/
public void setNumUniqueStrings( final int count )
{
field_2_num_unique_strings = count;
}
/** /**
* Get a particular string by its index * Get a particular string by its index
@ -178,11 +142,6 @@ public final class SSTRecord extends Record {
return (UnicodeString) field_3_strings.get( id ); return (UnicodeString) field_3_strings.get( id );
} }
public boolean isString16bit( final int id )
{
UnicodeString unicodeString = ( (UnicodeString) field_3_strings.get( id ) );
return ( ( unicodeString.getOptionFlags() & 0x01 ) == 1 );
}
/** /**
* Return a debugging string representation * Return a debugging string representation
@ -350,29 +309,11 @@ public final class SSTRecord extends Record {
return field_3_strings.size(); return field_3_strings.size();
} }
/** protected void serialize(ContinuableRecordOutput out) {
* called by the class that is responsible for writing this sucker. SSTSerializer serializer = new SSTSerializer(field_3_strings, getNumStrings(), getNumUniqueStrings() );
* Subclasses should implement this so that their data is passed back in a serializer.serialize(out);
* byte array.
*
* @return size
*/
public int serialize( int offset, byte[] data )
{
SSTSerializer serializer = new SSTSerializer(
field_3_strings, getNumStrings(), getNumUniqueStrings() );
int bytes = serializer.serialize( offset, data );
bucketAbsoluteOffsets = serializer.getBucketAbsoluteOffsets(); bucketAbsoluteOffsets = serializer.getBucketAbsoluteOffsets();
bucketRelativeOffsets = serializer.getBucketRelativeOffsets(); bucketRelativeOffsets = serializer.getBucketRelativeOffsets();
return bytes;
}
protected int getDataSize() {
SSTRecordSizeCalculator calculator = new SSTRecordSizeCalculator(field_3_strings);
int recordSize = calculator.getRecordSize();
return recordSize-4;
} }
SSTDeserializer getDeserializer() SSTDeserializer getDeserializer()

View File

@ -1,76 +0,0 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianConsts;
/**
* Write out an SST header record.
*
* @author Glen Stampoultzis (glens at apache.org)
*/
class SSTRecordHeader
{
int numStrings;
int numUniqueStrings;
public SSTRecordHeader( int numStrings, int numUniqueStrings )
{
this.numStrings = numStrings;
this.numUniqueStrings = numUniqueStrings;
}
/**
* Writes out the SST record. This consists of the sid, the record size, the number of
* strings and the number of unique strings.
*
* @param data The data buffer to write the header to.
* @param bufferIndex The index into the data buffer where the header should be written.
* @param recSize The number of records written.
*
* @return The bufer of bytes modified.
*/
public int writeSSTHeader( UnicodeString.UnicodeRecordStats stats, byte[] data, int bufferIndex, int recSize )
{
int offset = bufferIndex;
LittleEndian.putShort( data, offset, SSTRecord.sid );
offset += LittleEndianConsts.SHORT_SIZE;
stats.recordSize += LittleEndianConsts.SHORT_SIZE;
stats.remainingSize -= LittleEndianConsts.SHORT_SIZE;
//Delay writing the length
stats.lastLengthPos = offset;
offset += LittleEndianConsts.SHORT_SIZE;
stats.recordSize += LittleEndianConsts.SHORT_SIZE;
stats.remainingSize -= LittleEndianConsts.SHORT_SIZE;
LittleEndian.putInt( data, offset, numStrings );
offset += LittleEndianConsts.INT_SIZE;
stats.recordSize += LittleEndianConsts.INT_SIZE;
stats.remainingSize -= LittleEndianConsts.INT_SIZE;
LittleEndian.putInt( data, offset, numUniqueStrings );
offset += LittleEndianConsts.INT_SIZE;
stats.recordSize += LittleEndianConsts.INT_SIZE;
stats.remainingSize -= LittleEndianConsts.INT_SIZE;
return offset - bufferIndex;
}
}

View File

@ -1,51 +0,0 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record;
import org.apache.poi.util.IntMapper;
/**
* Used to calculate the record sizes for a particular record. This kind of
* sucks because it's similar to the SST serialization code. In general
* the SST serialization code needs to be rewritten.
*
* @author Glen Stampoultzis (glens at apache.org)
* @author Jason Height (jheight at apache.org)
*/
class SSTRecordSizeCalculator
{
private IntMapper strings;
public SSTRecordSizeCalculator(IntMapper strings)
{
this.strings = strings;
}
public int getRecordSize() {
UnicodeString.UnicodeRecordStats rs = new UnicodeString.UnicodeRecordStats();
rs.remainingSize -= SSTRecord.SST_RECORD_OVERHEAD;
rs.recordSize += SSTRecord.SST_RECORD_OVERHEAD;
for (int i=0; i < strings.size(); i++ )
{
UnicodeString unistr = ( (UnicodeString) strings.get(i));
unistr.getRecordSize(rs);
}
return rs.recordSize;
}
}

View File

@ -1,4 +1,3 @@
/* ==================================================================== /* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with contributor license agreements. See the NOTICE file distributed with
@ -15,12 +14,11 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==================================================================== */ ==================================================================== */
package org.apache.poi.hssf.record; package org.apache.poi.hssf.record;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
import org.apache.poi.util.IntMapper; import org.apache.poi.util.IntMapper;
import org.apache.poi.util.LittleEndian;
/** /**
* This class handles serialization of SST records. It utilizes the record processor * This class handles serialization of SST records. It utilizes the record processor
@ -28,71 +26,50 @@ import org.apache.poi.util.LittleEndian;
* *
* @author Glen Stampoultzis (glens at apache.org) * @author Glen Stampoultzis (glens at apache.org)
*/ */
class SSTSerializer final class SSTSerializer {
{
// todo: make private again private final int _numStrings;
private IntMapper strings; private final int _numUniqueStrings;
private SSTRecordHeader sstRecordHeader; private final IntMapper strings;
/** Offsets from the beginning of the SST record (even across continuations) */ /** Offsets from the beginning of the SST record (even across continuations) */
int[] bucketAbsoluteOffsets; private final int[] bucketAbsoluteOffsets;
/** Offsets relative the start of the current SST or continue record */ /** Offsets relative the start of the current SST or continue record */
int[] bucketRelativeOffsets; private final int[] bucketRelativeOffsets;
int startOfSST, startOfRecord; int startOfSST, startOfRecord;
public SSTSerializer( IntMapper strings, int numStrings, int numUniqueStrings ) public SSTSerializer( IntMapper strings, int numStrings, int numUniqueStrings )
{ {
this.strings = strings; this.strings = strings;
this.sstRecordHeader = new SSTRecordHeader( numStrings, numUniqueStrings ); _numStrings = numStrings;
_numUniqueStrings = numUniqueStrings;
int infoRecs = ExtSSTRecord.getNumberOfInfoRecsForStrings(strings.size()); int infoRecs = ExtSSTRecord.getNumberOfInfoRecsForStrings(strings.size());
this.bucketAbsoluteOffsets = new int[infoRecs]; this.bucketAbsoluteOffsets = new int[infoRecs];
this.bucketRelativeOffsets = new int[infoRecs]; this.bucketRelativeOffsets = new int[infoRecs];
} }
/** public void serialize(ContinuableRecordOutput out) {
* Create a byte array consisting of an SST record and any out.writeInt(_numStrings);
* required Continue records, ready to be written out. out.writeInt(_numUniqueStrings);
* <p>
* If an SST record and any subsequent Continue records are read
* in to create this instance, this method should produce a byte
* array that is identical to the byte array produced by
* concatenating the input records' data.
*
* @return the byte array
*/
public int serialize(int offset, byte[] data )
{
UnicodeString.UnicodeRecordStats stats = new UnicodeString.UnicodeRecordStats();
sstRecordHeader.writeSSTHeader( stats, data, 0 + offset, 0 );
int pos = offset + SSTRecord.SST_RECORD_OVERHEAD;
for ( int k = 0; k < strings.size(); k++ ) for ( int k = 0; k < strings.size(); k++ )
{ {
if (k % ExtSSTRecord.DEFAULT_BUCKET_SIZE == 0) if (k % ExtSSTRecord.DEFAULT_BUCKET_SIZE == 0)
{ {
int rOff = out.getTotalSize();
int index = k/ExtSSTRecord.DEFAULT_BUCKET_SIZE; int index = k/ExtSSTRecord.DEFAULT_BUCKET_SIZE;
if (index < ExtSSTRecord.MAX_BUCKETS) { if (index < ExtSSTRecord.MAX_BUCKETS) {
//Excel only indexes the first 128 buckets. //Excel only indexes the first 128 buckets.
bucketAbsoluteOffsets[index] = pos-offset; bucketAbsoluteOffsets[index] = rOff;
bucketRelativeOffsets[index] = pos-offset; bucketRelativeOffsets[index] = rOff;
} }
} }
UnicodeString s = getUnicodeString(k); UnicodeString s = getUnicodeString(k);
pos += s.serialize(stats, pos, data); s.serialize(out);
} }
//Check to see if there is a hanging continue record length }
if (stats.lastLengthPos != -1) {
short lastRecordLength = (short)(pos - stats.lastLengthPos-2);
if (lastRecordLength > 8224)
throw new InternalError();
LittleEndian.putShort(data, stats.lastLengthPos, lastRecordLength);
}
return pos - offset;
}
private UnicodeString getUnicodeString( int index ) private UnicodeString getUnicodeString( int index )

View File

@ -17,19 +17,23 @@
package org.apache.poi.hssf.record; package org.apache.poi.hssf.record;
import org.apache.poi.util.LittleEndian; import org.apache.poi.hssf.record.cont.ContinuableRecord;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
import org.apache.poi.util.StringUtil; import org.apache.poi.util.StringUtil;
/** /**
* Supports the STRING record structure. (0x0207) * STRING (0x0207)<p/>
*
* Stores the cached result of a text formula
* *
* @author Glen Stampoultzis (glens at apache.org) * @author Glen Stampoultzis (glens at apache.org)
*/ */
public class StringRecord extends Record { public final class StringRecord extends ContinuableRecord {
public final static short sid = 0x0207;
private int field_1_string_length; public final static short sid = 0x0207;
private byte field_2_unicode_flag;
private String field_3_string; private boolean _is16bitUnicode;
private String _text;
public StringRecord() public StringRecord()
@ -39,77 +43,24 @@ public class StringRecord extends Record {
/** /**
* @param in the RecordInputstream to read the record from * @param in the RecordInputstream to read the record from
*/ */
public StringRecord( RecordInputStream in) public StringRecord( RecordInputStream in) {
{ int field_1_string_length = in.readUShort();
field_1_string_length = in.readShort(); _is16bitUnicode = in.readByte() != 0x00;
field_2_unicode_flag = in.readByte();
byte[] data = in.readRemainder(); if (_is16bitUnicode){
//Why isn't this using the in.readString methods??? _text = in.readUnicodeLEString(field_1_string_length);
if (isUnCompressedUnicode()) } else {
{ _text = in.readCompressedUnicode(field_1_string_length);
field_3_string = StringUtil.getFromUnicodeLE(data, 0, field_1_string_length );
}
else
{
field_3_string = StringUtil.getFromCompressedUnicode(data, 0, field_1_string_length);
} }
} }
public void processContinueRecord(byte[] data) {
if(isUnCompressedUnicode()) { protected void serialize(ContinuableRecordOutput out) {
field_3_string += StringUtil.getFromUnicodeLE(data, 0, field_1_string_length - field_3_string.length()); out.writeShort(_text.length());
} else { out.writeStringData(_text);
field_3_string += StringUtil.getFromCompressedUnicode(data, 0, field_1_string_length - field_3_string.length());
}
} }
private int getStringByteLength()
{
return isUnCompressedUnicode() ? field_1_string_length * 2 : field_1_string_length;
}
protected int getDataSize() {
return 2 + 1 + getStringByteLength();
}
/**
* is this uncompressed unicode (16bit)? Or just 8-bit compressed?
* @return isUnicode - True for 16bit- false for 8bit
*/
public boolean isUnCompressedUnicode()
{
return (field_2_unicode_flag == 1);
}
/**
* called by the class that is responsible for writing this sucker.
* Subclasses should implement this so that their data is passed back in a
* byte array.
*
* @param offset to begin writing at
* @param data byte array containing instance data
* @return number of bytes written
*/
public int serialize( int offset, byte[] data )
{
LittleEndian.putUShort(data, 0 + offset, sid);
LittleEndian.putUShort(data, 2 + offset, 3 + getStringByteLength());
LittleEndian.putUShort(data, 4 + offset, field_1_string_length);
data[6 + offset] = field_2_unicode_flag;
if (isUnCompressedUnicode())
{
StringUtil.putUnicodeLE(field_3_string, data, 7 + offset);
}
else
{
StringUtil.putCompressedUnicode(field_3_string, data, 7 + offset);
}
return getRecordSize();
}
/**
* return the non static version of the id for this record.
*/
public short getSid() public short getSid()
{ {
return sid; return sid;
@ -120,26 +71,16 @@ public class StringRecord extends Record {
*/ */
public String getString() public String getString()
{ {
return field_3_string; return _text;
} }
/**
* Sets whether the string is compressed or not
* @param unicode_flag 1 = uncompressed, 0 = compressed
*/
public void setCompressedFlag( byte unicode_flag )
{
this.field_2_unicode_flag = unicode_flag;
}
/** /**
* Sets the string represented by this record. * Sets the string represented by this record.
*/ */
public void setString( String string ) public void setString(String string) {
{ _text = string;
this.field_1_string_length = string.length(); _is16bitUnicode = StringUtil.hasMultibyte(string);
this.field_3_string = string;
setCompressedFlag(StringUtil.hasMultibyte(string) ? (byte)1 : (byte)0);
} }
public String toString() public String toString()
@ -148,16 +89,15 @@ public class StringRecord extends Record {
buffer.append("[STRING]\n"); buffer.append("[STRING]\n");
buffer.append(" .string = ") buffer.append(" .string = ")
.append(field_3_string).append("\n"); .append(_text).append("\n");
buffer.append("[/STRING]\n"); buffer.append("[/STRING]\n");
return buffer.toString(); return buffer.toString();
} }
public Object clone() { public Object clone() {
StringRecord rec = new StringRecord(); StringRecord rec = new StringRecord();
rec.field_1_string_length = this.field_1_string_length; rec._is16bitUnicode= _is16bitUnicode;
rec.field_2_unicode_flag= this.field_2_unicode_flag; rec._text = _text;
rec.field_3_string = this.field_3_string;
return rec; return rec;
} }
} }

View File

@ -17,16 +17,13 @@
package org.apache.poi.hssf.record; package org.apache.poi.hssf.record;
import java.io.UnsupportedEncodingException; import org.apache.poi.hssf.record.cont.ContinuableRecord;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
import org.apache.poi.hssf.record.formula.Ptg; import org.apache.poi.hssf.record.formula.Ptg;
import org.apache.poi.hssf.usermodel.HSSFRichTextString; import org.apache.poi.hssf.usermodel.HSSFRichTextString;
import org.apache.poi.util.BitField; import org.apache.poi.util.BitField;
import org.apache.poi.util.BitFieldFactory; import org.apache.poi.util.BitFieldFactory;
import org.apache.poi.util.HexDump; import org.apache.poi.util.HexDump;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianByteArrayOutputStream;
import org.apache.poi.util.LittleEndianOutput;
/** /**
* The TXO record (0x01B6) is used to define the properties of a text box. It is * The TXO record (0x01B6) is used to define the properties of a text box. It is
@ -36,7 +33,7 @@ import org.apache.poi.util.LittleEndianOutput;
* *
* @author Glen Stampoultzis (glens at apache.org) * @author Glen Stampoultzis (glens at apache.org)
*/ */
public final class TextObjectRecord extends Record { public final class TextObjectRecord extends ContinuableRecord {
public final static short sid = 0x01B6; public final static short sid = 0x01B6;
private static final int FORMAT_RUN_ENCODED_SIZE = 8; // 2 shorts and 4 bytes reserved private static final int FORMAT_RUN_ENCODED_SIZE = 8; // 2 shorts and 4 bytes reserved
@ -163,30 +160,7 @@ public final class TextObjectRecord extends Record {
return sid; return sid;
} }
/** private void serializeTXORecord(ContinuableRecordOutput out) {
* Only for the current record. does not include any subsequent Continue
* records
*/
private int getCurrentRecordDataSize() {
int result = 2 + 2 + 2 + 2 + 2 + 2 + 2 + 4;
if (_linkRefPtg != null) {
result += 2 // formula size
+ 4 // unknownInt
+_linkRefPtg.getSize();
if (_unknownPostFormulaByte != null) {
result += 1;
}
}
return result;
}
private int serializeTXORecord(int offset, byte[] data) {
int dataSize = getCurrentRecordDataSize();
int recSize = dataSize+4;
LittleEndianOutput out = new LittleEndianByteArrayOutputStream(data, offset, recSize);
out.writeShort(TextObjectRecord.sid);
out.writeShort(dataSize);
out.writeShort(field_1_options); out.writeShort(field_1_options);
out.writeShort(field_2_textOrientation); out.writeShort(field_2_textOrientation);
@ -206,79 +180,23 @@ public final class TextObjectRecord extends Record {
out.writeByte(_unknownPostFormulaByte.byteValue()); out.writeByte(_unknownPostFormulaByte.byteValue());
} }
} }
return recSize;
} }
private int serializeTrailingRecords(int offset, byte[] data) { private void serializeTrailingRecords(ContinuableRecordOutput out) {
byte[] textBytes; out.writeContinue();
try { out.writeStringData(_text.getString());
textBytes = _text.getString().getBytes("UTF-16LE"); out.writeContinue();
} catch (UnsupportedEncodingException e) { writeFormatData(out, _text);
throw new RuntimeException(e.getMessage(), e);
}
int remainingLength = textBytes.length;
int countTextBytesWritten = 0;
int pos = offset;
// (regardless what was read, we always serialize double-byte
// unicode characters (UTF-16LE).
Byte unicodeFlag = new Byte((byte)1);
while (remainingLength > 0) {
int chunkSize = Math.min(RecordInputStream.MAX_RECORD_DATA_SIZE - 2, remainingLength);
remainingLength -= chunkSize;
pos += ContinueRecord.write(data, pos, unicodeFlag, textBytes, countTextBytesWritten, chunkSize);
countTextBytesWritten += chunkSize;
}
byte[] formatData = createFormatData(_text);
pos += ContinueRecord.write(data, pos, null, formatData);
return pos - offset;
} }
private int getTrailingRecordsSize() { protected void serialize(ContinuableRecordOutput out) {
if (_text.length() < 1) {
return 0;
}
int encodedTextSize = 0;
int textBytesLength = _text.length() * LittleEndian.SHORT_SIZE;
while (textBytesLength > 0) {
int chunkSize = Math.min(RecordInputStream.MAX_RECORD_DATA_SIZE - 2, textBytesLength);
textBytesLength -= chunkSize;
encodedTextSize += 4; // +4 for ContinueRecord sid+size serializeTXORecord(out);
encodedTextSize += 1+chunkSize; // +1 for compressed unicode flag,
}
int encodedFormatSize = (_text.numFormattingRuns() + 1) * FORMAT_RUN_ENCODED_SIZE
+ 4; // +4 for ContinueRecord sid+size
return encodedTextSize + encodedFormatSize;
}
public int serialize(int offset, byte[] data) {
int expectedTotalSize = getRecordSize();
int totalSize = serializeTXORecord(offset, data);
if (_text.getString().length() > 0) { if (_text.getString().length() > 0) {
totalSize += serializeTrailingRecords(offset+totalSize, data); serializeTrailingRecords(out);
} }
if (totalSize != expectedTotalSize)
throw new RecordFormatException(totalSize
+ " bytes written but getRecordSize() reports " + expectedTotalSize);
return totalSize;
} }
/**
* Note - this total size includes all potential {@link ContinueRecord}s written
* but it is not the "ushort size" value to be written at the start of the first BIFF record
*/
protected int getDataSize() {
return getCurrentRecordDataSize() + getTrailingRecordsSize();
}
private int getFormattingDataLength() { private int getFormattingDataLength() {
if (_text.length() < 1) { if (_text.length() < 1) {
// important - no formatting data if text is empty // important - no formatting data if text is empty
@ -287,25 +205,17 @@ public final class TextObjectRecord extends Record {
return (_text.numFormattingRuns() + 1) * FORMAT_RUN_ENCODED_SIZE; return (_text.numFormattingRuns() + 1) * FORMAT_RUN_ENCODED_SIZE;
} }
private static byte[] createFormatData(HSSFRichTextString str) { private static void writeFormatData(ContinuableRecordOutput out , HSSFRichTextString str) {
int nRuns = str.numFormattingRuns(); int nRuns = str.numFormattingRuns();
byte[] result = new byte[(nRuns + 1) * FORMAT_RUN_ENCODED_SIZE];
int pos = 0;
for (int i = 0; i < nRuns; i++) { for (int i = 0; i < nRuns; i++) {
LittleEndian.putUShort(result, pos, str.getIndexOfFormattingRun(i)); out.writeShort(str.getIndexOfFormattingRun(i));
pos += 2;
int fontIndex = str.getFontOfFormattingRun(i); int fontIndex = str.getFontOfFormattingRun(i);
LittleEndian.putUShort(result, pos, fontIndex == str.NO_FONT ? 0 : fontIndex); out.writeShort(fontIndex == str.NO_FONT ? 0 : fontIndex);
pos += 2; out.writeInt(0); // skip reserved
pos += 4; // skip reserved
} }
LittleEndian.putUShort(result, pos, str.length()); out.writeShort(str.length());
pos += 2; out.writeShort(0);
LittleEndian.putUShort(result, pos, 0); out.writeInt(0); // skip reserved
pos += 2;
pos += 4; // skip reserved
return result;
} }
/** /**

View File

@ -17,75 +17,84 @@
package org.apache.poi.hssf.record; package org.apache.poi.hssf.record;
import org.apache.poi.util.BitField;
import org.apache.poi.util.BitFieldFactory;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.HexDump;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
import org.apache.poi.util.BitField;
import org.apache.poi.util.BitFieldFactory;
import org.apache.poi.util.HexDump;
import org.apache.poi.util.LittleEndianInput;
import org.apache.poi.util.LittleEndianOutput;
/** /**
* Title: Unicode String<P> * Title: Unicode String<p/>
* Description: Unicode String record. We implement these as a record, although * Description: Unicode String - just standard fields that are in several records.
* they are really just standard fields that are in several records. * It is considered more desirable then repeating it in all of them.<p/>
* It is considered more desirable then repeating it in all of them.<P> * REFERENCE: PG 264 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)<p/>
* REFERENCE: PG 264 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)<P>
* @author Andrew C. Oliver * @author Andrew C. Oliver
* @author Marc Johnson (mjohnson at apache dot org) * @author Marc Johnson (mjohnson at apache dot org)
* @author Glen Stampoultzis (glens at apache.org) * @author Glen Stampoultzis (glens at apache.org)
*/ */
public final class UnicodeString implements Comparable { public final class UnicodeString implements Comparable {
private short field_1_charCount; // = 0; private short field_1_charCount;
private byte field_2_optionflags; // = 0; private byte field_2_optionflags;
private String field_3_string; // = null; private String field_3_string;
private List field_4_format_runs; private List field_4_format_runs;
private byte[] field_5_ext_rst; private byte[] field_5_ext_rst;
private static final BitField highByte = BitFieldFactory.getInstance(0x1); private static final BitField highByte = BitFieldFactory.getInstance(0x1);
private static final BitField extBit = BitFieldFactory.getInstance(0x4); private static final BitField extBit = BitFieldFactory.getInstance(0x4);
private static final BitField richText = BitFieldFactory.getInstance(0x8); private static final BitField richText = BitFieldFactory.getInstance(0x8);
public static class FormatRun implements Comparable { public static class FormatRun implements Comparable {
short character; short character;
short fontIndex; short fontIndex;
public FormatRun(short character, short fontIndex) { public FormatRun(short character, short fontIndex) {
this.character = character; this.character = character;
this.fontIndex = fontIndex; this.fontIndex = fontIndex;
}
public short getCharacterPos() {
return character;
}
public short getFontIndex() {
return fontIndex;
}
public boolean equals(Object o) {
if ((o == null) || (o.getClass() != this.getClass()))
{
return false;
} }
FormatRun other = ( FormatRun ) o;
return ((character == other.character) && (fontIndex == other.fontIndex)); public FormatRun(LittleEndianInput in) {
} this(in.readShort(), in.readShort());
}
public int compareTo(Object obj) { public short getCharacterPos() {
FormatRun r = (FormatRun)obj; return character;
if ((character == r.character) && (fontIndex == r.fontIndex)) }
return 0;
if (character == r.character)
return fontIndex - r.fontIndex;
else return character - r.character;
}
public String toString() { public short getFontIndex() {
return "character="+character+",fontIndex="+fontIndex; return fontIndex;
} }
public boolean equals(Object o) {
if (!(o instanceof FormatRun)) {
return false;
}
FormatRun other = ( FormatRun ) o;
return character == other.character && fontIndex == other.fontIndex;
}
public int compareTo(Object obj) {
FormatRun r = (FormatRun)obj;
if ((character == r.character) && (fontIndex == r.fontIndex))
return 0;
if (character == r.character)
return fontIndex - r.fontIndex;
else return character - r.character;
}
public String toString() {
return "character="+character+",fontIndex="+fontIndex;
}
public void serialize(LittleEndianOutput out) {
out.writeShort(character);
out.writeShort(fontIndex);
}
} }
private UnicodeString() { private UnicodeString() {
@ -116,13 +125,12 @@ public final class UnicodeString implements Comparable {
*/ */
public boolean equals(Object o) public boolean equals(Object o)
{ {
if ((o == null) || (o.getClass() != this.getClass())) if (!(o instanceof UnicodeString)) {
{
return false; return false;
} }
UnicodeString other = ( UnicodeString ) o; UnicodeString other = (UnicodeString) o;
//Ok lets do this in stages to return a quickly, first check the actual string //OK lets do this in stages to return a quickly, first check the actual string
boolean eq = ((field_1_charCount == other.field_1_charCount) boolean eq = ((field_1_charCount == other.field_1_charCount)
&& (field_2_optionflags == other.field_2_optionflags) && (field_2_optionflags == other.field_2_optionflags)
&& field_3_string.equals(other.field_3_string)); && field_3_string.equals(other.field_3_string));
@ -148,7 +156,7 @@ public final class UnicodeString implements Comparable {
if (!run1.equals(run2)) if (!run1.equals(run2))
return false; return false;
} }
//Well the format runs are equal as well!, better check the ExtRst data //Well the format runs are equal as well!, better check the ExtRst data
//Which by the way we dont know how to decode! //Which by the way we dont know how to decode!
@ -194,19 +202,17 @@ public final class UnicodeString implements Comparable {
boolean isCompressed = ((field_2_optionflags & 1) == 0); boolean isCompressed = ((field_2_optionflags & 1) == 0);
if (isCompressed) { if (isCompressed) {
field_3_string = in.readCompressedUnicode(field_1_charCount); field_3_string = in.readCompressedUnicode(field_1_charCount);
} else { } else {
field_3_string = in.readUnicodeLEString(field_1_charCount); field_3_string = in.readUnicodeLEString(field_1_charCount);
} }
if (isRichText() && (runCount > 0)) { if (isRichText() && (runCount > 0)) {
field_4_format_runs = new ArrayList(runCount); field_4_format_runs = new ArrayList(runCount);
for (int i=0;i<runCount;i++) { for (int i=0;i<runCount;i++) {
field_4_format_runs.add(new FormatRun(in.readShort(), in.readShort())); field_4_format_runs.add(new FormatRun(in));
//read reserved }
//in.readInt();
}
} }
if (isExtendedText() && (extensionLength > 0)) { if (isExtendedText() && (extensionLength > 0)) {
@ -372,11 +378,8 @@ public final class UnicodeString implements Comparable {
field_2_optionflags = richText.clearByte(field_2_optionflags); field_2_optionflags = richText.clearByte(field_2_optionflags);
} }
public byte[] getExtendedRst() {
return this.field_5_ext_rst;
}
public void setExtendedRst(byte[] ext_rst) { void setExtendedRst(byte[] ext_rst) {
if (ext_rst != null) if (ext_rst != null)
field_2_optionflags = extBit.setByte(field_2_optionflags); field_2_optionflags = extBit.setByte(field_2_optionflags);
else field_2_optionflags = extBit.clearByte(field_2_optionflags); else field_2_optionflags = extBit.clearByte(field_2_optionflags);
@ -391,13 +394,13 @@ public final class UnicodeString implements Comparable {
* removed / re-ordered * removed / re-ordered
*/ */
public void swapFontUse(short oldFontIndex, short newFontIndex) { public void swapFontUse(short oldFontIndex, short newFontIndex) {
Iterator i = field_4_format_runs.iterator(); Iterator i = field_4_format_runs.iterator();
while(i.hasNext()) { while(i.hasNext()) {
FormatRun run = (FormatRun)i.next(); FormatRun run = (FormatRun)i.next();
if(run.fontIndex == oldFontIndex) { if(run.fontIndex == oldFontIndex) {
run.fontIndex = newFontIndex; run.fontIndex = newFontIndex;
} }
} }
} }
/** /**
@ -442,353 +445,45 @@ public final class UnicodeString implements Comparable {
return buffer.toString(); return buffer.toString();
} }
private int writeContinueIfRequired(UnicodeRecordStats stats, final int requiredSize, int offset, byte[] data) { public void serialize(ContinuableRecordOutput out) {
//Basic string overhead int numberOfRichTextRuns = 0;
if (stats.remainingSize < requiredSize) { int extendedDataSize = 0;
//Check if be are already in a continue record, if so make sure that if (isRichText() && field_4_format_runs != null) {
//we go back and write out our length numberOfRichTextRuns = field_4_format_runs.size();
if (stats.lastLengthPos != -1) { }
short lastRecordLength = (short)(offset - stats.lastLengthPos - 2); if (isExtendedText() && field_5_ext_rst != null) {
if (lastRecordLength > 8224) extendedDataSize = field_5_ext_rst.length;
throw new InternalError(); }
LittleEndian.putShort(data, stats.lastLengthPos, lastRecordLength);
out.writeString(field_3_string, numberOfRichTextRuns, extendedDataSize);
if (numberOfRichTextRuns > 0) {
//This will ensure that a run does not split a continue
for (int i=0;i<numberOfRichTextRuns;i++) {
if (out.getAvailableSpace() < 4) {
out.writeContinue();
}
FormatRun r = (FormatRun)field_4_format_runs.get(i);
r.serialize(out);
}
} }
LittleEndian.putShort(data, offset, ContinueRecord.sid); if (extendedDataSize > 0) {
offset+=2; // OK ExtRst is actually not documented, so i am going to hope
//Record the location of the last continue length position, but don't write // that we can actually continue on byte boundaries
//anything there yet (since we don't know what it will be!)
stats.lastLengthPos = offset;
offset += 2;
stats.recordSize += 4; int extPos = 0;
stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4; while (true) {
} int nBytesToWrite = Math.min(extendedDataSize - extPos, out.getAvailableSpace());
return offset; out.write(field_5_ext_rst, extPos, nBytesToWrite);
} extPos += nBytesToWrite;
if (extPos >= extendedDataSize) {
public int serialize(UnicodeRecordStats stats, final int offset, byte [] data) break;
{ }
int pos = offset; out.writeContinue();
//Basic string overhead
pos = writeContinueIfRequired(stats, 3, pos, data);
LittleEndian.putShort(data, pos, getCharCount());
pos += 2;
data[ pos ] = getOptionFlags();
pos += 1;
stats.recordSize += 3;
stats.remainingSize-= 3;
if (isRichText()) {
if (field_4_format_runs != null) {
pos = writeContinueIfRequired(stats, 2, pos, data);
LittleEndian.putShort(data, pos, (short) field_4_format_runs.size());
pos += 2;
stats.recordSize += 2;
stats.remainingSize -= 2;
}
}
if ( isExtendedText() )
{
if (this.field_5_ext_rst != null) {
pos = writeContinueIfRequired(stats, 4, pos, data);
LittleEndian.putInt(data, pos, field_5_ext_rst.length);
pos += 4;
stats.recordSize += 4;
stats.remainingSize -= 4;
}
}
int charsize = isUncompressedUnicode() ? 2 : 1;
int strSize = (getString().length() * charsize);
byte[] strBytes = null;
try {
String unicodeString = getString();
if (!isUncompressedUnicode())
{
strBytes = unicodeString.getBytes("ISO-8859-1");
}
else
{
strBytes = unicodeString.getBytes("UTF-16LE");
} }
} }
catch (Exception e) {
throw new InternalError();
}
if (strSize != strBytes.length)
throw new InternalError("That shouldnt have happened!");
//Check to see if the offset occurs mid string, if so then we need to add
//the byte to start with that represents the first byte of the continue record.
if (strSize > stats.remainingSize) {
//OK the offset occurs half way through the string, that means that
//we need an extra byte after the continue record ie we didnt finish
//writing out the string the 1st time through
//But hang on, how many continue records did we span? What if this is
//a REALLY long string. We need to work this all out.
int amountThatCantFit = strSize;
int strPos = 0;
while (amountThatCantFit > 0) {
int amountWritten = Math.min(stats.remainingSize, amountThatCantFit);
//Make sure that the amount that can't fit takes into account
//whether we are writing double byte unicode
if (isUncompressedUnicode()) {
//We have the '-1' here because whether this is the first record or
//subsequent continue records, there is always the case that the
//number of bytes in a string on double byte boundaries is actually odd.
if ( ( (amountWritten ) % 2) == 1)
amountWritten--;
}
System.arraycopy(strBytes, strPos, data, pos, amountWritten);
pos += amountWritten;
strPos += amountWritten;
stats.recordSize += amountWritten;
stats.remainingSize -= amountWritten;
//Ok lets subtract what we can write
amountThatCantFit -= amountWritten;
//Each iteration of this while loop is another continue record, unless
//everything now fits.
if (amountThatCantFit > 0) {
//We know that a continue WILL be requied, but use this common method
pos = writeContinueIfRequired(stats, amountThatCantFit, pos, data);
//The first byte after a continue mid string is the extra byte to
//indicate if this run is compressed or not.
data[pos] = (byte) (isUncompressedUnicode() ? 0x1 : 0x0);
pos++;
stats.recordSize++;
stats.remainingSize --;
}
}
} else {
if (strSize > (data.length-pos))
System.out.println("Hmm shouldnt happen");
//Ok the string fits nicely in the remaining size
System.arraycopy(strBytes, 0, data, pos, strSize);
pos += strSize;
stats.recordSize += strSize;
stats.remainingSize -= strSize;
}
if (isRichText() && (field_4_format_runs != null)) {
int count = field_4_format_runs.size();
//This will ensure that a run does not split a continue
for (int i=0;i<count;i++) {
pos = writeContinueIfRequired(stats, 4, pos, data);
FormatRun r = (FormatRun)field_4_format_runs.get(i);
LittleEndian.putShort(data, pos, r.character);
pos += 2;
LittleEndian.putShort(data, pos, r.fontIndex);
pos += 2;
//Each run count is four bytes
stats.recordSize += 4;
stats.remainingSize -=4;
}
}
if (isExtendedText() && (field_5_ext_rst != null)) {
//Ok ExtRst is actually not documented, so i am going to hope
//that we can actually continue on byte boundaries
int ammountThatCantFit = field_5_ext_rst.length - stats.remainingSize;
int extPos = 0;
if (ammountThatCantFit > 0) {
while (ammountThatCantFit > 0) {
//So for this record we have already written
int ammountWritten = Math.min(stats.remainingSize, ammountThatCantFit);
System.arraycopy(field_5_ext_rst, extPos, data, pos, ammountWritten);
pos += ammountWritten;
extPos += ammountWritten;
stats.recordSize += ammountWritten;
stats.remainingSize -= ammountWritten;
//Ok lets subtract what we can write
ammountThatCantFit -= ammountWritten;
if (ammountThatCantFit > 0) {
pos = writeContinueIfRequired(stats, 1, pos, data);
}
}
} else {
//We can fit wholey in what remains.
System.arraycopy(field_5_ext_rst, 0, data, pos, field_5_ext_rst.length);
pos += field_5_ext_rst.length;
stats.remainingSize -= field_5_ext_rst.length;
stats.recordSize += field_5_ext_rst.length;
}
}
return pos - offset;
}
public void setCompressedUnicode() {
field_2_optionflags = highByte.setByte(field_2_optionflags);
}
public void setUncompressedUnicode() {
field_2_optionflags = highByte.clearByte(field_2_optionflags);
}
private boolean isUncompressedUnicode()
{
return highByte.isSet(getOptionFlags());
}
/** Returns the size of this record, given the amount of record space
* remaining, it will also include the size of writing a continue record.
*/
public static class UnicodeRecordStats {
public int recordSize;
public int remainingSize = SSTRecord.MAX_RECORD_SIZE;
public int lastLengthPos = -1;
}
public void getRecordSize(UnicodeRecordStats stats) {
//Basic string overhead
if (stats.remainingSize < 3) {
//Needs a continue
stats.recordSize += 4;
stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
}
stats.recordSize += 3;
stats.remainingSize-= 3;
//Read the number of rich runs if rich text.
if ( isRichText() )
{
//Run count
if (stats.remainingSize < 2) {
//Needs a continue
//Reset the available space.
stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
//continue record overhead
stats.recordSize+=4;
}
stats.recordSize += 2;
stats.remainingSize -=2;
}
//Read the size of extended data if present.
if ( isExtendedText() )
{
//Needs a continue
//extension length
if (stats.remainingSize < 4) {
//Reset the available space.
stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
//continue record overhead
stats.recordSize+=4;
}
stats.recordSize += 4;
stats.remainingSize -=4;
}
int charsize = isUncompressedUnicode() ? 2 : 1;
int strSize = (getString().length() * charsize);
//Check to see if the offset occurs mid string, if so then we need to add
//the byte to start with that represents the first byte of the continue record.
if (strSize > stats.remainingSize) {
//Ok the offset occurs half way through the string, that means that
//we need an extra byte after the continue record ie we didnt finish
//writing out the string the 1st time through
//But hang on, how many continue records did we span? What if this is
//a REALLY long string. We need to work this all out.
int ammountThatCantFit = strSize;
while (ammountThatCantFit > 0) {
int ammountWritten = Math.min(stats.remainingSize, ammountThatCantFit);
//Make sure that the ammount that cant fit takes into account
//whether we are writing double byte unicode
if (isUncompressedUnicode()) {
//We have the '-1' here because whether this is the first record or
//subsequent continue records, there is always the case that the
//number of bytes in a string on doube byte boundaries is actually odd.
if ( ( (ammountWritten) % 2) == 1)
ammountWritten--;
}
stats.recordSize += ammountWritten;
stats.remainingSize -= ammountWritten;
//Ok lets subtract what we can write
ammountThatCantFit -= ammountWritten;
//Each iteration of this while loop is another continue record, unless
//everything now fits.
if (ammountThatCantFit > 0) {
//Reset the available space.
stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
//continue record overhead
stats.recordSize+=4;
//The first byte after a continue mid string is the extra byte to
//indicate if this run is compressed or not.
stats.recordSize++;
stats.remainingSize --;
}
}
} else {
//Ok the string fits nicely in the remaining size
stats.recordSize += strSize;
stats.remainingSize -= strSize;
}
if (isRichText() && (field_4_format_runs != null)) {
int count = field_4_format_runs.size();
//This will ensure that a run does not split a continue
for (int i=0;i<count;i++) {
if (stats.remainingSize < 4) {
//Reset the available space.
stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
//continue record overhead
stats.recordSize+=4;
}
//Each run count is four bytes
stats.recordSize += 4;
stats.remainingSize -=4;
}
}
if (isExtendedText() && (field_5_ext_rst != null)) {
//Ok ExtRst is actually not documented, so i am going to hope
//that we can actually continue on byte boundaries
int ammountThatCantFit = field_5_ext_rst.length - stats.remainingSize;
if (ammountThatCantFit > 0) {
while (ammountThatCantFit > 0) {
//So for this record we have already written
int ammountWritten = Math.min(stats.remainingSize, ammountThatCantFit);
stats.recordSize += ammountWritten;
stats.remainingSize -= ammountWritten;
//Ok lets subtract what we can write
ammountThatCantFit -= ammountWritten;
if (ammountThatCantFit > 0) {
//Each iteration of this while loop is another continue record.
//Reset the available space.
stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
//continue record overhead
stats.recordSize += 4;
}
}
} else {
//We can fit wholey in what remains.
stats.remainingSize -= field_5_ext_rst.length;
stats.recordSize += field_5_ext_rst.length;
}
}
} }
public int compareTo(Object obj) public int compareTo(Object obj)
@ -801,9 +496,9 @@ public final class UnicodeString implements Comparable {
if (result != 0) if (result != 0)
return result; return result;
//Ok string appears to be equal but now lets compare formatting runs //OK string appears to be equal but now lets compare formatting runs
if ((field_4_format_runs == null) && (str.field_4_format_runs == null)) if ((field_4_format_runs == null) && (str.field_4_format_runs == null))
//Strings are equal, and there are no formtting runs. //Strings are equal, and there are no formatting runs.
return 0; return 0;
if ((field_4_format_runs == null) && (str.field_4_format_runs != null)) if ((field_4_format_runs == null) && (str.field_4_format_runs != null))
@ -850,12 +545,12 @@ public final class UnicodeString implements Comparable {
return 0; return 0;
} }
public boolean isRichText() private boolean isRichText()
{ {
return richText.isSet(getOptionFlags()); return richText.isSet(getOptionFlags());
} }
public boolean isExtendedText() private boolean isExtendedText()
{ {
return extBit.isSet(getOptionFlags()); return extBit.isSet(getOptionFlags());
} }
@ -877,10 +572,8 @@ public final class UnicodeString implements Comparable {
str.field_5_ext_rst = new byte[field_5_ext_rst.length]; str.field_5_ext_rst = new byte[field_5_ext_rst.length];
System.arraycopy(field_5_ext_rst, 0, str.field_5_ext_rst, 0, System.arraycopy(field_5_ext_rst, 0, str.field_5_ext_rst, 0,
field_5_ext_rst.length); field_5_ext_rst.length);
} }
return str; return str;
} }
} }

View File

@ -0,0 +1,69 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record.cont;
import org.apache.poi.hssf.record.ContinueRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.util.LittleEndianByteArrayOutputStream;
import org.apache.poi.util.LittleEndianOutput;
/**
* Common superclass of all records that can produce {@link ContinueRecord}s while being serialized.
*
* @author Josh Micich
*/
public abstract class ContinuableRecord extends Record {
protected ContinuableRecord() {
// no fields to initialise
}
/**
* Serializes this record's content to the supplied data output.<br/>
* The standard BIFF header (ushort sid, ushort size) has been handled by the superclass, so
* only BIFF data should be written by this method. Simple data types can be written with the
* standard {@link LittleEndianOutput} methods. Methods from {@link ContinuableRecordOutput}
* can be used to serialize strings (with {@link ContinueRecord}s being written as required).
* If necessary, implementors can explicitly start {@link ContinueRecord}s (regardless of the
* amount of remaining space).
*
* @param out a data output stream
*/
protected abstract void serialize(ContinuableRecordOutput out);
/**
* @return four less than the total length of the encoded record(s)
* (in the case when no {@link ContinueRecord} is needed, this is the
* same ushort value that gets encoded after the record sid
*/
protected final int getDataSize() {
ContinuableRecordOutput out = ContinuableRecordOutput.createForCountingOnly();
serialize(out);
out.terminate();
return out.getTotalSize() - 4;
}
public final int serialize(int offset, byte[] data) {
LittleEndianOutput leo = new LittleEndianByteArrayOutputStream(data, offset);
ContinuableRecordOutput out = new ContinuableRecordOutput(leo, getSid());
serialize(out);
out.terminate();
return out.getTotalSize();
}
}

View File

@ -0,0 +1,257 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record.cont;
import org.apache.poi.hssf.record.ContinueRecord;
import org.apache.poi.util.DelayableLittleEndianOutput;
import org.apache.poi.util.LittleEndianOutput;
import org.apache.poi.util.StringUtil;
/**
* An augmented {@link LittleEndianOutput} used for serialization of {@link ContinuableRecord}s.
* This class keeps track of how much remaining space is available in the current BIFF record and
* can start new {@link ContinueRecord}s as required.
*
* @author Josh Micich
*/
public final class ContinuableRecordOutput implements LittleEndianOutput {
private final LittleEndianOutput _out;
private UnknownLengthRecordOutput _ulrOutput;
private int _totalPreviousRecordsSize;
ContinuableRecordOutput(LittleEndianOutput out, int sid) {
_ulrOutput = new UnknownLengthRecordOutput(out, sid);
_out = out;
_totalPreviousRecordsSize = 0;
}
public static ContinuableRecordOutput createForCountingOnly() {
return new ContinuableRecordOutput(NOPOutput, -777); // fake sid
}
/**
* @return total number of bytes written so far (including all BIFF headers)
*/
public int getTotalSize() {
return _totalPreviousRecordsSize + _ulrOutput.getTotalSize();
}
/**
* Terminates the last record (also updates its 'ushort size' field)
*/
void terminate() {
_ulrOutput.terminate();
}
/**
* @return number of remaining bytes of space in current record
*/
public int getAvailableSpace() {
return _ulrOutput.getAvailableSpace();
}
/**
* Terminates the current record and starts a new {@link ContinueRecord} (regardless
* of how much space is still available in the current record).
*/
public void writeContinue() {
_ulrOutput.terminate();
_totalPreviousRecordsSize += _ulrOutput.getTotalSize();
_ulrOutput = new UnknownLengthRecordOutput(_out, ContinueRecord.sid);
}
public void writeContinueIfRequired(int requiredContinuousSize) {
if (_ulrOutput.getAvailableSpace() < requiredContinuousSize) {
writeContinue();
}
}
/**
* Writes the 'optionFlags' byte and encoded character data of a unicode string. This includes:
* <ul>
* <li>byte optionFlags</li>
* <li>encoded character data (in "ISO-8859-1" or "UTF-16LE" encoding)</li>
* </ul>
*
* Notes:
* <ul>
* <li>The value of the 'is16bitEncoded' flag is determined by the actual character data
* of <tt>text</tt></li>
* <li>The string options flag is never separated (by a {@link ContinueRecord}) from the
* first chunk of character data it refers to.</li>
* <li>The 'ushort length' field is assumed to have been explicitly written earlier. Hence,
* there may be an intervening {@link ContinueRecord}</li>
* </ul>
*/
public void writeStringData(String text) {
boolean is16bitEncoded = StringUtil.hasMultibyte(text);
// calculate total size of the header and first encoded char
int keepTogetherSize = 1 + 1; // ushort len, at least one character byte
int optionFlags = 0x00;
if (is16bitEncoded) {
optionFlags |= 0x01;
keepTogetherSize += 1; // one extra byte for first char
}
writeContinueIfRequired(keepTogetherSize);
writeByte(optionFlags);
writeCharacterData(text, is16bitEncoded);
}
/**
* Writes a unicode string complete with header and character data. This includes:
* <ul>
* <li>ushort length</li>
* <li>byte optionFlags</li>
* <li>ushort numberOfRichTextRuns (optional)</li>
* <li>ushort extendedDataSize (optional)</li>
* <li>encoded character data (in "ISO-8859-1" or "UTF-16LE" encoding)</li>
* </ul>
*
* The following bits of the 'optionFlags' byte will be set as appropriate:
* <table border='1'>
* <tr><th>Mask</th><th>Description</th></tr>
* <tr><td>0x01</td><td>is16bitEncoded</td></tr>
* <tr><td>0x04</td><td>hasExtendedData</td></tr>
* <tr><td>0x08</td><td>isRichText</td></tr>
* </table>
* Notes:
* <ul>
* <li>The value of the 'is16bitEncoded' flag is determined by the actual character data
* of <tt>text</tt></li>
* <li>The string header fields are never separated (by a {@link ContinueRecord}) from the
* first chunk of character data (i.e. the first character is always encoded in the same
* record as the string header).</li>
* </ul>
*/
public void writeString(String text, int numberOfRichTextRuns, int extendedDataSize) {
boolean is16bitEncoded = StringUtil.hasMultibyte(text);
// calculate total size of the header and first encoded char
int keepTogetherSize = 2 + 1 + 1; // ushort len, byte optionFlags, at least one character byte
int optionFlags = 0x00;
if (is16bitEncoded) {
optionFlags |= 0x01;
keepTogetherSize += 1; // one extra byte for first char
}
if (numberOfRichTextRuns > 0) {
optionFlags |= 0x08;
keepTogetherSize += 2;
}
if (extendedDataSize > 0) {
optionFlags |= 0x04;
keepTogetherSize += 4;
}
writeContinueIfRequired(keepTogetherSize);
writeShort(text.length());
writeByte(optionFlags);
if (numberOfRichTextRuns > 0) {
writeShort(numberOfRichTextRuns);
}
if (extendedDataSize > 0) {
writeInt(extendedDataSize);
}
writeCharacterData(text, is16bitEncoded);
}
private void writeCharacterData(String text, boolean is16bitEncoded) {
int nChars = text.length();
int i=0;
if (is16bitEncoded) {
while(true) {
int nWritableChars = Math.min(nChars-i, _ulrOutput.getAvailableSpace() / 2);
for ( ; nWritableChars > 0; nWritableChars--) {
_ulrOutput.writeShort(text.charAt(i++));
}
if (i >= nChars) {
break;
}
writeContinue();
writeByte(0x01);
}
} else {
while(true) {
int nWritableChars = Math.min(nChars-i, _ulrOutput.getAvailableSpace() / 1);
for ( ; nWritableChars > 0; nWritableChars--) {
_ulrOutput.writeByte(text.charAt(i++));
}
if (i >= nChars) {
break;
}
writeContinue();
writeByte(0x00);
}
}
}
public void write(byte[] b) {
writeContinueIfRequired(b.length);
_ulrOutput.write(b);
}
public void write(byte[] b, int offset, int len) {
writeContinueIfRequired(len);
_ulrOutput.write(b, offset, len);
}
public void writeByte(int v) {
writeContinueIfRequired(1);
_ulrOutput.writeByte(v);
}
public void writeDouble(double v) {
writeContinueIfRequired(8);
_ulrOutput.writeDouble(v);
}
public void writeInt(int v) {
writeContinueIfRequired(4);
_ulrOutput.writeInt(v);
}
public void writeLong(long v) {
writeContinueIfRequired(8);
_ulrOutput.writeLong(v);
}
public void writeShort(int v) {
writeContinueIfRequired(2);
_ulrOutput.writeShort(v);
}
/**
* Allows optimised usage of {@link ContinuableRecordOutput} for sizing purposes only.
*/
private static final LittleEndianOutput NOPOutput = new DelayableLittleEndianOutput() {
public LittleEndianOutput createDelayedOutput(int size) {
return this;
}
public void write(byte[] b) {
// does nothing
}
public void write(byte[] b, int offset, int len) {
// does nothing
}
public void writeByte(int v) {
// does nothing
}
public void writeDouble(double v) {
// does nothing
}
public void writeInt(int v) {
// does nothing
}
public void writeLong(long v) {
// does nothing
}
public void writeShort(int v) {
// does nothing
}
};
}

View File

@ -0,0 +1,114 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record.cont;
import org.apache.poi.hssf.record.RecordInputStream;
import org.apache.poi.util.DelayableLittleEndianOutput;
import org.apache.poi.util.LittleEndianByteArrayOutputStream;
import org.apache.poi.util.LittleEndianOutput;
/**
* Allows the writing of BIFF records when the 'ushort size' header field is not known in advance.
* When the client is finished writing data, it calls {@link #terminate()}, at which point this
* class updates the 'ushort size' with its final value.
*
* @author Josh Micich
*/
final class UnknownLengthRecordOutput implements LittleEndianOutput {
private static final int MAX_DATA_SIZE = RecordInputStream.MAX_RECORD_DATA_SIZE;
private final LittleEndianOutput _originalOut;
/** for writing the 'ushort size' field once its value is known */
private final LittleEndianOutput _dataSizeOutput;
private final byte[] _byteBuffer;
private LittleEndianOutput _out;
private int _size;
public UnknownLengthRecordOutput(LittleEndianOutput out, int sid) {
_originalOut = out;
out.writeShort(sid);
if (out instanceof DelayableLittleEndianOutput) {
// optimisation
DelayableLittleEndianOutput dleo = (DelayableLittleEndianOutput) out;
_dataSizeOutput = dleo.createDelayedOutput(2);
_byteBuffer = null;
_out = out;
} else {
// otherwise temporarily write all subsequent data to a buffer
_dataSizeOutput = out;
_byteBuffer = new byte[RecordInputStream.MAX_RECORD_DATA_SIZE];
_out = new LittleEndianByteArrayOutputStream(_byteBuffer, 0);
}
}
/**
* includes 4 byte header
*/
public int getTotalSize() {
return 4 + _size;
}
public int getAvailableSpace() {
if (_out == null) {
throw new IllegalStateException("Record already terminated");
}
return MAX_DATA_SIZE - _size;
}
/**
* Finishes writing the current record and updates 'ushort size' field.<br/>
* After this method is called, only {@link #getTotalSize()} may be called.
*/
public void terminate() {
if (_out == null) {
throw new IllegalStateException("Record already terminated");
}
_dataSizeOutput.writeShort(_size);
if (_byteBuffer != null) {
_originalOut.write(_byteBuffer, 0, _size);
_out = null;
return;
}
_out = null;
}
public void write(byte[] b) {
_out.write(b);
_size += b.length;
}
public void write(byte[] b, int offset, int len) {
_out.write(b, offset, len);
_size += len;
}
public void writeByte(int v) {
_out.writeByte(v);
_size += 1;
}
public void writeDouble(double v) {
_out.writeDouble(v);
_size += 8;
}
public void writeInt(int v) {
_out.writeInt(v);
_size += 4;
}
public void writeLong(long v) {
_out.writeLong(v);
_size += 8;
}
public void writeShort(int v) {
_out.writeShort(v);
_size += 2;
}
}

View File

@ -0,0 +1,34 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.util;
/**
* Implementors of this interface allow client code to 'delay' writing to a certain section of a
* data output stream.<br/>
* A typical application is for writing BIFF records when the size is not known until well after
* the header has been written. The client code can call {@link #createDelayedOutput(int)}
* to reserve two bytes of the output for the 'ushort size' header field. The delayed output can
* be written at any stage.
*
* @author Josh Micich
*/
public interface DelayableLittleEndianOutput extends LittleEndianOutput {
/**
* Creates an output stream intended for outputting a sequence of <tt>size</tt> bytes.
*/
LittleEndianOutput createDelayedOutput(int size);
}

View File

@ -24,7 +24,7 @@ package org.apache.poi.util;
* *
* @author Josh Micich * @author Josh Micich
*/ */
public final class LittleEndianByteArrayOutputStream implements LittleEndianOutput { public final class LittleEndianByteArrayOutputStream implements LittleEndianOutput, DelayableLittleEndianOutput {
private final byte[] _buf; private final byte[] _buf;
private final int _endIndex; private final int _endIndex;
private int _writeIndex; private int _writeIndex;
@ -89,4 +89,10 @@ public final class LittleEndianByteArrayOutputStream implements LittleEndianOutp
public int getWriteIndex() { public int getWriteIndex() {
return _writeIndex; return _writeIndex;
} }
public LittleEndianOutput createDelayedOutput(int size) {
checkPosition(size);
LittleEndianOutput result = new LittleEndianByteArrayOutputStream(_buf, _writeIndex, _writeIndex+size);
_writeIndex += size;
return result;
}
} }

View File

@ -48,7 +48,6 @@ public final class TestRecordFactory extends TestCase {
byte[] data = { byte[] data = {
0, 6, 5, 0, -2, 28, -51, 7, -55, 64, 0, 0, 6, 1, 0, 0 0, 6, 5, 0, -2, 28, -51, 7, -55, 64, 0, 0, 6, 1, 0, 0
}; };
short size = 16;
Record[] record = RecordFactory.createRecord(TestcaseRecordInputStream.create(recType, data)); Record[] record = RecordFactory.createRecord(TestcaseRecordInputStream.create(recType, data));
assertEquals(BOFRecord.class.getName(), assertEquals(BOFRecord.class.getName(),
@ -64,7 +63,6 @@ public final class TestRecordFactory extends TestCase {
assertEquals(5, bofRecord.getType()); assertEquals(5, bofRecord.getType());
assertEquals(1536, bofRecord.getVersion()); assertEquals(1536, bofRecord.getVersion());
recType = MMSRecord.sid; recType = MMSRecord.sid;
size = 2;
data = new byte[] data = new byte[]
{ {
0, 0 0, 0
@ -93,7 +91,6 @@ public final class TestRecordFactory extends TestCase {
byte[] data = { byte[] data = {
0, 0, 0, 0, 21, 0, 0, 0, 0, 0 0, 0, 0, 0, 21, 0, 0, 0, 0, 0
}; };
short size = 10;
Record[] record = RecordFactory.createRecord(TestcaseRecordInputStream.create(recType, data)); Record[] record = RecordFactory.createRecord(TestcaseRecordInputStream.create(recType, data));
assertEquals(NumberRecord.class.getName(), assertEquals(NumberRecord.class.getName(),
@ -154,34 +151,34 @@ public final class TestRecordFactory extends TestCase {
*/ */
public void testMixedContinue() throws Exception { public void testMixedContinue() throws Exception {
/** /**
* Taken from a real test sample file 39512.xls. See Bug 39512 for details. * Adapted from a real test sample file 39512.xls (Offset 0x4854).
* See Bug 39512 for details.
*/ */
String dump = String dump =
//OBJ //OBJ
"5D, 00, 48, 00, 15, 00, 12, 00, 0C, 00, 3C, 00, 11, 00, A0, 2E, 03, 01, CC, 42, " + "5D 00 48 00 15 00 12 00 0C 00 3C 00 11 00 A0 2E 03 01 CC 42 " +
"CF, 00, 00, 00, 00, 00, 0A, 00, 0C, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, " + "CF 00 00 00 00 00 0A 00 0C 00 00 00 00 00 00 00 00 00 00 00 " +
"03, 00, 0B, 00, 06, 00, 28, 01, 03, 01, 00, 00, 12, 00, 08, 00, 00, 00, 00, 00, " + "03 00 0B 00 06 00 28 01 03 01 00 00 12 00 08 00 00 00 00 00 " +
"00, 00, 03, 00, 11, 00, 04, 00, 3D, 00, 00, 00, 00, 00, 00, 00, " + "00 00 03 00 11 00 04 00 3D 00 00 00 00 00 00 00 " +
//MSODRAWING //MSODRAWING
"EC, 00, 08, 00, 00, 00, 0D, F0, 00, 00, 00, 00, " + "EC 00 08 00 00 00 0D F0 00 00 00 00 " +
//TXO //TXO (and 2 trailing CONTINUE records)
"B6, 01, 12, 00, 22, 02, 00, 00, 00, 00, 00, 00, 00, 00, 10, 00, 10, 00, 00, 00, " + "B6 01 12 00 22 02 00 00 00 00 00 00 00 00 10 00 10 00 00 00 00 00 " +
"00, 00, 3C, 00, 21, 00, 01, 4F, 00, 70, 00, 74, 00, 69, 00, 6F, 00, 6E, 00, 20, " + "3C 00 11 00 00 4F 70 74 69 6F 6E 20 42 75 74 74 6F 6E 20 33 39 " +
"00, 42, 00, 75, 00, 74, 00, 74, 00, 6F, 00, 6E, 00, 20, 00, 33, 00, 39, 00, 3C, " + "3C 00 10 00 00 00 05 00 00 00 00 00 10 00 00 00 00 00 00 00 " +
"00, 10, 00, 00, 00, 05, 00, 00, 00, 00, 00, 10, 00, 00, 00, 00, 00, 00, 00, " + // another CONTINUE
//CONTINUE "3C 00 7E 00 0F 00 04 F0 7E 00 00 00 92 0C 0A F0 08 00 00 00 " +
"3C, 00, 7E, 00, 0F, 00, 04, F0, 7E, 00, 00, 00, 92, 0C, 0A, F0, 08, 00, 00, 00, " + "3D 04 00 00 00 0A 00 00 A3 00 0B F0 3C 00 00 00 7F 00 00 01 " +
"3D, 04, 00, 00, 00, 0A, 00, 00, A3, 00, 0B, F0, 3C, 00, 00, 00, 7F, 00, 00, 01, " + "00 01 80 00 8C 01 03 01 85 00 01 00 00 00 8B 00 02 00 00 00 " +
"00, 01, 80, 00, 8C, 01, 03, 01, 85, 00, 01, 00, 00, 00, 8B, 00, 02, 00, 00, 00, " + "BF 00 08 00 1A 00 7F 01 29 00 29 00 81 01 41 00 00 08 BF 01 " +
"BF, 00, 08, 00, 1A, 00, 7F, 01, 29, 00, 29, 00, 81, 01, 41, 00, 00, 08, BF, 01, " + "00 00 10 00 C0 01 40 00 00 08 FF 01 00 00 08 00 00 00 10 F0 " +
"00, 00, 10, 00, C0, 01, 40, 00, 00, 08, FF, 01, 00, 00, 08, 00, 00, 00, 10, F0, " + "12 00 00 00 02 00 02 00 A0 03 18 00 B5 00 04 00 30 02 1A 00 " +
"12, 00, 00, 00, 02, 00, 02, 00, A0, 03, 18, 00, B5, 00, 04, 00, 30, 02, 1A, 00, " + "00 00 00 00 11 F0 00 00 00 00 " +
"00, 00, 00, 00, 11, F0, 00, 00, 00, 00, " +
//OBJ //OBJ
"5D, 00, 48, 00, 15, 00, 12, 00, 0C, 00, 3D, 00, 11, 00, 8C, 01, 03, 01, C8, 59, CF, 00, 00, " + "5D 00 48 00 15 00 12 00 0C 00 3D 00 11 00 8C 01 03 01 C8 59 CF 00 00 " +
"00, 00, 00, 0A, 00, 0C, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 03, 00, 0B, 00, 06, 00, " + "00 00 00 0A 00 0C 00 00 00 00 00 00 00 00 00 00 00 03 00 0B 00 06 00 " +
"7C, 16, 03, 01, 00, 00, 12, 00, 08, 00, 00, 00, 00, 00, 00, 00, 03, 00, 11, 00, 04, 00, 01, " + "7C 16 03 01 00 00 12 00 08 00 00 00 00 00 00 00 03 00 11 00 04 00 01 " +
"00, 00, 00, 00, 00, 00, 00"; "00 00 00 00 00 00 00";
byte[] data = HexRead.readFromString(dump); byte[] data = HexRead.readFromString(dump);
List records = RecordFactory.createRecords(new ByteArrayInputStream(data)); List records = RecordFactory.createRecords(new ByteArrayInputStream(data));

View File

@ -19,6 +19,7 @@ package org.apache.poi.hssf.record;
import junit.framework.TestCase; import junit.framework.TestCase;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
import org.apache.poi.util.IntMapper; import org.apache.poi.util.IntMapper;
/** /**
@ -35,8 +36,10 @@ public final class TestSSTRecordSizeCalculator extends TestCase {
private void confirmSize(int expectedSize) { private void confirmSize(int expectedSize) {
SSTRecordSizeCalculator calculator = new SSTRecordSizeCalculator(strings); ContinuableRecordOutput cro = ContinuableRecordOutput.createForCountingOnly();
assertEquals(expectedSize, calculator.getRecordSize()); SSTSerializer ss = new SSTSerializer(strings, 0, 0);
ss.serialize(cro);
assertEquals(expectedSize, cro.getTotalSize());
} }
public void testBasic() { public void testBasic() {

View File

@ -18,6 +18,12 @@
package org.apache.poi.hssf.record; package org.apache.poi.hssf.record;
import org.apache.poi.util.HexRead;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianByteArrayInputStream;
import org.apache.poi.util.LittleEndianInput;
import junit.framework.AssertionFailedError;
import junit.framework.TestCase; import junit.framework.TestCase;
/** /**
@ -28,29 +34,66 @@ import junit.framework.TestCase;
* @author Glen Stampoultzis (glens at apache.org) * @author Glen Stampoultzis (glens at apache.org)
*/ */
public final class TestStringRecord extends TestCase { public final class TestStringRecord extends TestCase {
byte[] data = new byte[] { private static final byte[] data = HexRead.readFromString(
(byte)0x0B,(byte)0x00, // length "0B 00 " + // length
(byte)0x00, // option "00 " + // option
// string // string
(byte)0x46,(byte)0x61,(byte)0x68,(byte)0x72,(byte)0x7A,(byte)0x65,(byte)0x75,(byte)0x67,(byte)0x74,(byte)0x79,(byte)0x70 "46 61 68 72 7A 65 75 67 74 79 70"
}; );
public void testLoad() { public void testLoad() {
StringRecord record = new StringRecord(TestcaseRecordInputStream.create(0x207, data)); StringRecord record = new StringRecord(TestcaseRecordInputStream.create(0x207, data));
assertEquals( "Fahrzeugtyp", record.getString()); assertEquals( "Fahrzeugtyp", record.getString());
assertEquals( 18, record.getRecordSize() ); assertEquals( 18, record.getRecordSize() );
} }
public void testStore() public void testStore() {
{ StringRecord record = new StringRecord();
StringRecord record = new StringRecord(); record.setString("Fahrzeugtyp");
record.setString("Fahrzeugtyp");
byte [] recordBytes = record.serialize(); byte [] recordBytes = record.serialize();
assertEquals(recordBytes.length - 4, data.length); assertEquals(recordBytes.length - 4, data.length);
for (int i = 0; i < data.length; i++) for (int i = 0; i < data.length; i++)
assertEquals("At offset " + i, data[i], recordBytes[i+4]); assertEquals("At offset " + i, data[i], recordBytes[i+4]);
} }
public void testContinue() {
int MAX_BIFF_DATA = RecordInputStream.MAX_RECORD_DATA_SIZE;
int TEXT_LEN = MAX_BIFF_DATA + 1000; // deliberately over-size
String textChunk = "ABCDEGGHIJKLMNOP"; // 16 chars
StringBuffer sb = new StringBuffer(16384);
while (sb.length() < TEXT_LEN) {
sb.append(textChunk);
}
sb.setLength(TEXT_LEN);
StringRecord sr = new StringRecord();
sr.setString(sb.toString());
byte[] ser = sr.serialize();
assertEquals(StringRecord.sid, LittleEndian.getUShort(ser, 0));
if (LittleEndian.getUShort(ser, 2) > MAX_BIFF_DATA) {
throw new AssertionFailedError(
"StringRecord should have been split with a continue record");
}
// Confirm expected size of first record, and ushort strLen.
assertEquals(MAX_BIFF_DATA, LittleEndian.getUShort(ser, 2));
assertEquals(TEXT_LEN, LittleEndian.getUShort(ser, 4));
// Confirm first few bytes of ContinueRecord
LittleEndianInput crIn = new LittleEndianByteArrayInputStream(ser, (MAX_BIFF_DATA + 4));
int nCharsInFirstRec = MAX_BIFF_DATA - (2 + 1); // strLen, optionFlags
int nCharsInSecondRec = TEXT_LEN - nCharsInFirstRec;
assertEquals(ContinueRecord.sid, crIn.readUShort());
assertEquals(1 + nCharsInSecondRec, crIn.readUShort());
assertEquals(0, crIn.readUByte());
assertEquals('N', crIn.readUByte());
assertEquals('O', crIn.readUByte());
// re-read and make sure string value is the same
RecordInputStream in = TestcaseRecordInputStream.create(ser);
StringRecord sr2 = new StringRecord(in);
assertEquals(sb.toString(), sr2.getString());
}
} }

View File

@ -44,9 +44,9 @@ public final class TestTextObjectBaseRecord extends TestCase {
"00 00" + "00 00" +
"00 00 " + "00 00 " +
"3C 00 " + // ContinueRecord.sid "3C 00 " + // ContinueRecord.sid
"05 00 " + // size 5 "03 00 " + // size 3
"01 " + // unicode uncompressed "00 " + // unicode compressed
"41 00 42 00 " + // 'AB' "41 42 " + // 'AB'
"3C 00 " + // ContinueRecord.sid "3C 00 " + // ContinueRecord.sid
"10 00 " + // size 16 "10 00 " + // size 16
"00 00 18 00 00 00 00 00 " + "00 00 18 00 00 00 00 00 " +
@ -63,7 +63,7 @@ public final class TestTextObjectBaseRecord extends TestCase {
assertEquals(true, record.isTextLocked()); assertEquals(true, record.isTextLocked());
assertEquals(TextObjectRecord.TEXT_ORIENTATION_ROT_RIGHT, record.getTextOrientation()); assertEquals(TextObjectRecord.TEXT_ORIENTATION_ROT_RIGHT, record.getTextOrientation());
assertEquals(51, record.getRecordSize() ); assertEquals(49, record.getRecordSize() );
} }
public void testStore() public void testStore()

View File

@ -37,16 +37,14 @@ import org.apache.poi.util.LittleEndian;
public final class TestTextObjectRecord extends TestCase { public final class TestTextObjectRecord extends TestCase {
private static final byte[] simpleData = HexRead.readFromString( private static final byte[] simpleData = HexRead.readFromString(
"B6 01 12 00 " + "B6 01 12 00 " +
"12 02 00 00 00 00 00 00" + "12 02 00 00 00 00 00 00" +
"00 00 0D 00 08 00 00 00" + "00 00 0D 00 08 00 00 00" +
"00 00 " + "00 00 " +
"3C 00 1B 00 " + "3C 00 0E 00 " +
"01 48 00 65 00 6C 00 6C 00 6F 00 " + "00 48 65 6C 6C 6F 2C 20 57 6F 72 6C 64 21 " +
"2C 00 20 00 57 00 6F 00 72 00 6C " + "3C 00 08 " +
"00 64 00 21 00 " + "00 0D 00 00 00 00 00 00 00"
"3C 00 08 " +
"00 0D 00 00 00 00 00 00 00"
); );
@ -92,12 +90,12 @@ public final class TestTextObjectRecord extends TestCase {
record.setStr(str); record.setStr(str);
byte [] ser = record.serialize(); byte [] ser = record.serialize();
int formatDataLen = LittleEndian.getUShort(ser, 16); int formatDataLen = LittleEndian.getUShort(ser, 16);
assertEquals("formatDataLength", 0, formatDataLen); assertEquals("formatDataLength", 0, formatDataLen);
assertEquals(22, ser.length); // just the TXO record assertEquals(22, ser.length); // just the TXO record
//read again //read again
RecordInputStream is = TestcaseRecordInputStream.create(ser); RecordInputStream is = TestcaseRecordInputStream.create(ser);
record = new TextObjectRecord(is); record = new TextObjectRecord(is);
@ -152,38 +150,38 @@ public final class TestTextObjectRecord extends TestCase {
byte[] cln = cloned.serialize(); byte[] cln = cloned.serialize();
assertTrue(Arrays.equals(src, cln)); assertTrue(Arrays.equals(src, cln));
} }
/** similar to {@link #simpleData} but with link formula at end of TXO rec*/ /** similar to {@link #simpleData} but with link formula at end of TXO rec*/
private static final byte[] linkData = HexRead.readFromString( private static final byte[] linkData = HexRead.readFromString(
"B6 01 " + // TextObjectRecord.sid "B6 01 " + // TextObjectRecord.sid
"1E 00 " + // size 18 "1E 00 " + // size 18
"44 02 02 00 00 00 00 00" + "44 02 02 00 00 00 00 00" +
"00 00 " + "00 00 " +
"02 00 " + // strLen 2 "02 00 " + // strLen 2
"10 00 " + // 16 bytes for 2 format runs "10 00 " + // 16 bytes for 2 format runs
"00 00 00 00 " + "00 00 00 00 " +
"05 00 " + // formula size "05 00 " + // formula size
"D4 F0 8A 03 " + // unknownInt "D4 F0 8A 03 " + // unknownInt
"24 01 00 13 C0 " + //tRef(T2) "24 01 00 13 C0 " + //tRef(T2)
"13 " + // ?? "13 " + // ??
"3C 00 " + // ContinueRecord.sid "3C 00 " + // ContinueRecord.sid
"05 00 " + // size 5 "03 00 " + // size 3
"01 " + // unicode uncompressed "00 " + // unicode compressed
"41 00 42 00 " + // 'AB' "41 42 " + // 'AB'
"3C 00 " + // ContinueRecord.sid "3C 00 " + // ContinueRecord.sid
"10 00 " + // size 16 "10 00 " + // size 16
"00 00 18 00 00 00 00 00 " + "00 00 18 00 00 00 00 00 " +
"02 00 00 00 00 00 00 00 " "02 00 00 00 00 00 00 00 "
); );
public void testLinkFormula() { public void testLinkFormula() {
RecordInputStream is = new RecordInputStream(new ByteArrayInputStream(linkData)); RecordInputStream is = new RecordInputStream(new ByteArrayInputStream(linkData));
is.nextRecord(); is.nextRecord();
TextObjectRecord rec = new TextObjectRecord(is); TextObjectRecord rec = new TextObjectRecord(is);
Ptg ptg = rec.getLinkRefPtg(); Ptg ptg = rec.getLinkRefPtg();
assertNotNull(ptg); assertNotNull(ptg);
assertEquals(RefPtg.class, ptg.getClass()); assertEquals(RefPtg.class, ptg.getClass());
@ -193,6 +191,6 @@ public final class TestTextObjectRecord extends TestCase {
byte [] data2 = rec.serialize(); byte [] data2 = rec.serialize();
assertEquals(linkData.length, data2.length); assertEquals(linkData.length, data2.length);
assertTrue(Arrays.equals(linkData, data2)); assertTrue(Arrays.equals(linkData, data2));
} }
} }

View File

@ -19,8 +19,11 @@ package org.apache.poi.hssf.record;
import junit.framework.TestCase; import junit.framework.TestCase;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
/** /**
* Tests that records size calculates correctly. * Tests that {@link UnicodeString} record size calculates correctly. The record size
* is used when serializing {@link SSTRecord}s.
* *
* @author Jason Height (jheight at apache.org) * @author Jason Height (jheight at apache.org)
*/ */
@ -33,11 +36,23 @@ public final class TestUnicodeString extends TestCase {
private static void confirmSize(int expectedSize, UnicodeString s) { private static void confirmSize(int expectedSize, UnicodeString s) {
confirmSize(expectedSize, s, 0); confirmSize(expectedSize, s, 0);
} }
/**
* Note - a value of zero for <tt>amountUsedInCurrentRecord</tt> would only ever occur just
* after a {@link ContinueRecord} had been started. In the initial {@link SSTRecord} this
* value starts at 8 (for the first {@link UnicodeString} written). In general, it can be
* any value between 0 and {@link #MAX_DATA_SIZE}
*/
private static void confirmSize(int expectedSize, UnicodeString s, int amountUsedInCurrentRecord) { private static void confirmSize(int expectedSize, UnicodeString s, int amountUsedInCurrentRecord) {
UnicodeString.UnicodeRecordStats stats = new UnicodeString.UnicodeRecordStats(); ContinuableRecordOutput out = ContinuableRecordOutput.createForCountingOnly();
stats.remainingSize = MAX_DATA_SIZE-amountUsedInCurrentRecord; out.writeContinue();
s.getRecordSize(stats); for(int i=amountUsedInCurrentRecord; i>0; i--) {
assertEquals(expectedSize, stats.recordSize); out.writeByte(0);
}
int size0 = out.getTotalSize();
s.serialize(out);
int size1 = out.getTotalSize();
int actualSize = size1-size0;
assertEquals(expectedSize, actualSize);
} }
public void testSmallStringSize() { public void testSmallStringSize() {