Introduced ContinuableRecord to help fix serialization of StringRecords with large data. Fixed TextObjectRecord to only write 16bit unicode when needed. Simplification in UnicodeString.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@711749 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Josh Micich 2008-11-06 01:12:41 +00:00
parent 603bd64f73
commit bcd1a9a1c5
19 changed files with 827 additions and 959 deletions

View File

@ -379,8 +379,6 @@ public final class RecordFactory {
records.add(record);
} else if (lastRecord instanceof DrawingGroupRecord) {
((DrawingGroupRecord)lastRecord).processContinueRecord(contRec.getData());
} else if (lastRecord instanceof StringRecord) {
((StringRecord)lastRecord).processContinueRecord(contRec.getData());
} else if (lastRecord instanceof UnknownRecord) {
//Gracefully handle records that we don't know about,
//that happen to be continued

View File

@ -17,14 +17,16 @@
package org.apache.poi.hssf.record;
import java.util.Iterator;
import org.apache.poi.hssf.record.cont.ContinuableRecord;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
import org.apache.poi.util.IntMapper;
import org.apache.poi.util.LittleEndianConsts;
import java.util.Iterator;
/**
* Title: Static String Table Record
* <P>
* Title: Static String Table Record (0x00FC)<p/>
*
* Description: This holds all the strings for LabelSSTRecords.
* <P>
* REFERENCE: PG 389 Microsoft Excel 97 Developer's Kit (ISBN:
@ -37,27 +39,20 @@ import java.util.Iterator;
* @see org.apache.poi.hssf.record.LabelSSTRecord
* @see org.apache.poi.hssf.record.ContinueRecord
*/
public final class SSTRecord extends Record {
public final class SSTRecord extends ContinuableRecord {
public static final short sid = 0x00FC;
private static UnicodeString EMPTY_STRING = new UnicodeString("");
/** how big can an SST record be? As big as any record can be: 8228 bytes */
static final int MAX_RECORD_SIZE = 8228;
private static final UnicodeString EMPTY_STRING = new UnicodeString("");
// TODO - move these constants to test class (the only consumer)
/** standard record overhead: two shorts (record id plus data space size)*/
static final int STD_RECORD_OVERHEAD =
2 * LittleEndianConsts.SHORT_SIZE;
static final int STD_RECORD_OVERHEAD = 2 * LittleEndianConsts.SHORT_SIZE;
/** SST overhead: the standard record overhead, plus the number of strings and the number of unique strings -- two ints */
static final int SST_RECORD_OVERHEAD =
( STD_RECORD_OVERHEAD + ( 2 * LittleEndianConsts.INT_SIZE ) );
static final int SST_RECORD_OVERHEAD = STD_RECORD_OVERHEAD + 2 * LittleEndianConsts.INT_SIZE;
/** how much data can we stuff into an SST record? That would be _max minus the standard SST record overhead */
static final int MAX_DATA_SPACE = MAX_RECORD_SIZE - SST_RECORD_OVERHEAD;
/** overhead for each string includes the string's character count (a short) and the flag describing its characteristics (a byte) */
static final int STRING_MINIMAL_OVERHEAD = LittleEndianConsts.SHORT_SIZE + LittleEndianConsts.BYTE_SIZE;
static final int MAX_DATA_SPACE = RecordInputStream.MAX_RECORD_DATA_SIZE - 8;
/** union of strings in the SST and EXTSST */
private int field_1_num_strings;
@ -133,37 +128,6 @@ public final class SSTRecord extends Record {
return field_2_num_unique_strings;
}
/**
* USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
* METHODS MANIPULATE THE NUMBER OF STRINGS AS A SIDE EFFECT; YOUR
* ATTEMPTS AT MANIPULATING THE STRING COUNT IS LIKELY TO BE VERY
* WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN THIS RECORD IS
* WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ THE RECORD
*
* @param count number of strings
*
*/
public void setNumStrings( final int count )
{
field_1_num_strings = count;
}
/**
* USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
* METHODS MANIPULATE THE NUMBER OF UNIQUE STRINGS AS A SIDE
* EFFECT; YOUR ATTEMPTS AT MANIPULATING THE UNIQUE STRING COUNT
* IS LIKELY TO BE VERY WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN
* THIS RECORD IS WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ
* THE RECORD
*
* @param count number of strings
*/
public void setNumUniqueStrings( final int count )
{
field_2_num_unique_strings = count;
}
/**
* Get a particular string by its index
@ -178,11 +142,6 @@ public final class SSTRecord extends Record {
return (UnicodeString) field_3_strings.get( id );
}
public boolean isString16bit( final int id )
{
UnicodeString unicodeString = ( (UnicodeString) field_3_strings.get( id ) );
return ( ( unicodeString.getOptionFlags() & 0x01 ) == 1 );
}
/**
* Return a debugging string representation
@ -350,29 +309,11 @@ public final class SSTRecord extends Record {
return field_3_strings.size();
}
/**
* called by the class that is responsible for writing this sucker.
* Subclasses should implement this so that their data is passed back in a
* byte array.
*
* @return size
*/
public int serialize( int offset, byte[] data )
{
SSTSerializer serializer = new SSTSerializer(
field_3_strings, getNumStrings(), getNumUniqueStrings() );
int bytes = serializer.serialize( offset, data );
protected void serialize(ContinuableRecordOutput out) {
SSTSerializer serializer = new SSTSerializer(field_3_strings, getNumStrings(), getNumUniqueStrings() );
serializer.serialize(out);
bucketAbsoluteOffsets = serializer.getBucketAbsoluteOffsets();
bucketRelativeOffsets = serializer.getBucketRelativeOffsets();
return bytes;
}
protected int getDataSize() {
SSTRecordSizeCalculator calculator = new SSTRecordSizeCalculator(field_3_strings);
int recordSize = calculator.getRecordSize();
return recordSize-4;
}
SSTDeserializer getDeserializer()

View File

@ -1,76 +0,0 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianConsts;
/**
* Write out an SST header record.
*
* @author Glen Stampoultzis (glens at apache.org)
*/
class SSTRecordHeader
{
int numStrings;
int numUniqueStrings;
public SSTRecordHeader( int numStrings, int numUniqueStrings )
{
this.numStrings = numStrings;
this.numUniqueStrings = numUniqueStrings;
}
/**
* Writes out the SST record. This consists of the sid, the record size, the number of
* strings and the number of unique strings.
*
* @param data The data buffer to write the header to.
* @param bufferIndex The index into the data buffer where the header should be written.
* @param recSize The number of records written.
*
* @return The bufer of bytes modified.
*/
public int writeSSTHeader( UnicodeString.UnicodeRecordStats stats, byte[] data, int bufferIndex, int recSize )
{
int offset = bufferIndex;
LittleEndian.putShort( data, offset, SSTRecord.sid );
offset += LittleEndianConsts.SHORT_SIZE;
stats.recordSize += LittleEndianConsts.SHORT_SIZE;
stats.remainingSize -= LittleEndianConsts.SHORT_SIZE;
//Delay writing the length
stats.lastLengthPos = offset;
offset += LittleEndianConsts.SHORT_SIZE;
stats.recordSize += LittleEndianConsts.SHORT_SIZE;
stats.remainingSize -= LittleEndianConsts.SHORT_SIZE;
LittleEndian.putInt( data, offset, numStrings );
offset += LittleEndianConsts.INT_SIZE;
stats.recordSize += LittleEndianConsts.INT_SIZE;
stats.remainingSize -= LittleEndianConsts.INT_SIZE;
LittleEndian.putInt( data, offset, numUniqueStrings );
offset += LittleEndianConsts.INT_SIZE;
stats.recordSize += LittleEndianConsts.INT_SIZE;
stats.remainingSize -= LittleEndianConsts.INT_SIZE;
return offset - bufferIndex;
}
}

View File

@ -1,51 +0,0 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record;
import org.apache.poi.util.IntMapper;
/**
* Used to calculate the record sizes for a particular record. This kind of
* sucks because it's similar to the SST serialization code. In general
* the SST serialization code needs to be rewritten.
*
* @author Glen Stampoultzis (glens at apache.org)
* @author Jason Height (jheight at apache.org)
*/
class SSTRecordSizeCalculator
{
private IntMapper strings;
public SSTRecordSizeCalculator(IntMapper strings)
{
this.strings = strings;
}
public int getRecordSize() {
UnicodeString.UnicodeRecordStats rs = new UnicodeString.UnicodeRecordStats();
rs.remainingSize -= SSTRecord.SST_RECORD_OVERHEAD;
rs.recordSize += SSTRecord.SST_RECORD_OVERHEAD;
for (int i=0; i < strings.size(); i++ )
{
UnicodeString unistr = ( (UnicodeString) strings.get(i));
unistr.getRecordSize(rs);
}
return rs.recordSize;
}
}

View File

@ -1,4 +1,3 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
@ -16,11 +15,10 @@
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
import org.apache.poi.util.IntMapper;
import org.apache.poi.util.LittleEndian;
/**
* This class handles serialization of SST records. It utilizes the record processor
@ -28,70 +26,49 @@ import org.apache.poi.util.LittleEndian;
*
* @author Glen Stampoultzis (glens at apache.org)
*/
class SSTSerializer
{
final class SSTSerializer {
// todo: make private again
private IntMapper strings;
private final int _numStrings;
private final int _numUniqueStrings;
private SSTRecordHeader sstRecordHeader;
private final IntMapper strings;
/** Offsets from the beginning of the SST record (even across continuations) */
int[] bucketAbsoluteOffsets;
private final int[] bucketAbsoluteOffsets;
/** Offsets relative the start of the current SST or continue record */
int[] bucketRelativeOffsets;
private final int[] bucketRelativeOffsets;
int startOfSST, startOfRecord;
public SSTSerializer( IntMapper strings, int numStrings, int numUniqueStrings )
{
this.strings = strings;
this.sstRecordHeader = new SSTRecordHeader( numStrings, numUniqueStrings );
_numStrings = numStrings;
_numUniqueStrings = numUniqueStrings;
int infoRecs = ExtSSTRecord.getNumberOfInfoRecsForStrings(strings.size());
this.bucketAbsoluteOffsets = new int[infoRecs];
this.bucketRelativeOffsets = new int[infoRecs];
}
/**
* Create a byte array consisting of an SST record and any
* required Continue records, ready to be written out.
* <p>
* If an SST record and any subsequent Continue records are read
* in to create this instance, this method should produce a byte
* array that is identical to the byte array produced by
* concatenating the input records' data.
*
* @return the byte array
*/
public int serialize(int offset, byte[] data )
{
UnicodeString.UnicodeRecordStats stats = new UnicodeString.UnicodeRecordStats();
sstRecordHeader.writeSSTHeader( stats, data, 0 + offset, 0 );
int pos = offset + SSTRecord.SST_RECORD_OVERHEAD;
public void serialize(ContinuableRecordOutput out) {
out.writeInt(_numStrings);
out.writeInt(_numUniqueStrings);
for ( int k = 0; k < strings.size(); k++ )
{
if (k % ExtSSTRecord.DEFAULT_BUCKET_SIZE == 0)
{
int rOff = out.getTotalSize();
int index = k/ExtSSTRecord.DEFAULT_BUCKET_SIZE;
if (index < ExtSSTRecord.MAX_BUCKETS) {
//Excel only indexes the first 128 buckets.
bucketAbsoluteOffsets[index] = pos-offset;
bucketRelativeOffsets[index] = pos-offset;
bucketAbsoluteOffsets[index] = rOff;
bucketRelativeOffsets[index] = rOff;
}
}
UnicodeString s = getUnicodeString(k);
pos += s.serialize(stats, pos, data);
s.serialize(out);
}
//Check to see if there is a hanging continue record length
if (stats.lastLengthPos != -1) {
short lastRecordLength = (short)(pos - stats.lastLengthPos-2);
if (lastRecordLength > 8224)
throw new InternalError();
LittleEndian.putShort(data, stats.lastLengthPos, lastRecordLength);
}
return pos - offset;
}

View File

@ -17,19 +17,23 @@
package org.apache.poi.hssf.record;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.hssf.record.cont.ContinuableRecord;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
import org.apache.poi.util.StringUtil;
/**
* Supports the STRING record structure. (0x0207)
* STRING (0x0207)<p/>
*
* Stores the cached result of a text formula
*
* @author Glen Stampoultzis (glens at apache.org)
*/
public class StringRecord extends Record {
public final class StringRecord extends ContinuableRecord {
public final static short sid = 0x0207;
private int field_1_string_length;
private byte field_2_unicode_flag;
private String field_3_string;
private boolean _is16bitUnicode;
private String _text;
public StringRecord()
@ -39,77 +43,24 @@ public class StringRecord extends Record {
/**
* @param in the RecordInputstream to read the record from
*/
public StringRecord( RecordInputStream in)
{
field_1_string_length = in.readShort();
field_2_unicode_flag = in.readByte();
byte[] data = in.readRemainder();
//Why isn't this using the in.readString methods???
if (isUnCompressedUnicode())
{
field_3_string = StringUtil.getFromUnicodeLE(data, 0, field_1_string_length );
}
else
{
field_3_string = StringUtil.getFromCompressedUnicode(data, 0, field_1_string_length);
}
}
public StringRecord( RecordInputStream in) {
int field_1_string_length = in.readUShort();
_is16bitUnicode = in.readByte() != 0x00;
public void processContinueRecord(byte[] data) {
if(isUnCompressedUnicode()) {
field_3_string += StringUtil.getFromUnicodeLE(data, 0, field_1_string_length - field_3_string.length());
if (_is16bitUnicode){
_text = in.readUnicodeLEString(field_1_string_length);
} else {
field_3_string += StringUtil.getFromCompressedUnicode(data, 0, field_1_string_length - field_3_string.length());
_text = in.readCompressedUnicode(field_1_string_length);
}
}
private int getStringByteLength()
{
return isUnCompressedUnicode() ? field_1_string_length * 2 : field_1_string_length;
protected void serialize(ContinuableRecordOutput out) {
out.writeShort(_text.length());
out.writeStringData(_text);
}
protected int getDataSize() {
return 2 + 1 + getStringByteLength();
}
/**
* is this uncompressed unicode (16bit)? Or just 8-bit compressed?
* @return isUnicode - True for 16bit- false for 8bit
*/
public boolean isUnCompressedUnicode()
{
return (field_2_unicode_flag == 1);
}
/**
* called by the class that is responsible for writing this sucker.
* Subclasses should implement this so that their data is passed back in a
* byte array.
*
* @param offset to begin writing at
* @param data byte array containing instance data
* @return number of bytes written
*/
public int serialize( int offset, byte[] data )
{
LittleEndian.putUShort(data, 0 + offset, sid);
LittleEndian.putUShort(data, 2 + offset, 3 + getStringByteLength());
LittleEndian.putUShort(data, 4 + offset, field_1_string_length);
data[6 + offset] = field_2_unicode_flag;
if (isUnCompressedUnicode())
{
StringUtil.putUnicodeLE(field_3_string, data, 7 + offset);
}
else
{
StringUtil.putCompressedUnicode(field_3_string, data, 7 + offset);
}
return getRecordSize();
}
/**
* return the non static version of the id for this record.
*/
public short getSid()
{
return sid;
@ -120,26 +71,16 @@ public class StringRecord extends Record {
*/
public String getString()
{
return field_3_string;
return _text;
}
/**
* Sets whether the string is compressed or not
* @param unicode_flag 1 = uncompressed, 0 = compressed
*/
public void setCompressedFlag( byte unicode_flag )
{
this.field_2_unicode_flag = unicode_flag;
}
/**
* Sets the string represented by this record.
*/
public void setString( String string )
{
this.field_1_string_length = string.length();
this.field_3_string = string;
setCompressedFlag(StringUtil.hasMultibyte(string) ? (byte)1 : (byte)0);
public void setString(String string) {
_text = string;
_is16bitUnicode = StringUtil.hasMultibyte(string);
}
public String toString()
@ -148,16 +89,15 @@ public class StringRecord extends Record {
buffer.append("[STRING]\n");
buffer.append(" .string = ")
.append(field_3_string).append("\n");
.append(_text).append("\n");
buffer.append("[/STRING]\n");
return buffer.toString();
}
public Object clone() {
StringRecord rec = new StringRecord();
rec.field_1_string_length = this.field_1_string_length;
rec.field_2_unicode_flag= this.field_2_unicode_flag;
rec.field_3_string = this.field_3_string;
rec._is16bitUnicode= _is16bitUnicode;
rec._text = _text;
return rec;
}
}

View File

@ -17,16 +17,13 @@
package org.apache.poi.hssf.record;
import java.io.UnsupportedEncodingException;
import org.apache.poi.hssf.record.cont.ContinuableRecord;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
import org.apache.poi.hssf.record.formula.Ptg;
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
import org.apache.poi.util.BitField;
import org.apache.poi.util.BitFieldFactory;
import org.apache.poi.util.HexDump;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianByteArrayOutputStream;
import org.apache.poi.util.LittleEndianOutput;
/**
* The TXO record (0x01B6) is used to define the properties of a text box. It is
@ -36,7 +33,7 @@ import org.apache.poi.util.LittleEndianOutput;
*
* @author Glen Stampoultzis (glens at apache.org)
*/
public final class TextObjectRecord extends Record {
public final class TextObjectRecord extends ContinuableRecord {
public final static short sid = 0x01B6;
private static final int FORMAT_RUN_ENCODED_SIZE = 8; // 2 shorts and 4 bytes reserved
@ -163,30 +160,7 @@ public final class TextObjectRecord extends Record {
return sid;
}
/**
* Only for the current record. does not include any subsequent Continue
* records
*/
private int getCurrentRecordDataSize() {
int result = 2 + 2 + 2 + 2 + 2 + 2 + 2 + 4;
if (_linkRefPtg != null) {
result += 2 // formula size
+ 4 // unknownInt
+_linkRefPtg.getSize();
if (_unknownPostFormulaByte != null) {
result += 1;
}
}
return result;
}
private int serializeTXORecord(int offset, byte[] data) {
int dataSize = getCurrentRecordDataSize();
int recSize = dataSize+4;
LittleEndianOutput out = new LittleEndianByteArrayOutputStream(data, offset, recSize);
out.writeShort(TextObjectRecord.sid);
out.writeShort(dataSize);
private void serializeTXORecord(ContinuableRecordOutput out) {
out.writeShort(field_1_options);
out.writeShort(field_2_textOrientation);
@ -206,79 +180,23 @@ public final class TextObjectRecord extends Record {
out.writeByte(_unknownPostFormulaByte.byteValue());
}
}
return recSize;
}
private int serializeTrailingRecords(int offset, byte[] data) {
byte[] textBytes;
try {
textBytes = _text.getString().getBytes("UTF-16LE");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e.getMessage(), e);
}
int remainingLength = textBytes.length;
int countTextBytesWritten = 0;
int pos = offset;
// (regardless what was read, we always serialize double-byte
// unicode characters (UTF-16LE).
Byte unicodeFlag = new Byte((byte)1);
while (remainingLength > 0) {
int chunkSize = Math.min(RecordInputStream.MAX_RECORD_DATA_SIZE - 2, remainingLength);
remainingLength -= chunkSize;
pos += ContinueRecord.write(data, pos, unicodeFlag, textBytes, countTextBytesWritten, chunkSize);
countTextBytesWritten += chunkSize;
private void serializeTrailingRecords(ContinuableRecordOutput out) {
out.writeContinue();
out.writeStringData(_text.getString());
out.writeContinue();
writeFormatData(out, _text);
}
byte[] formatData = createFormatData(_text);
pos += ContinueRecord.write(data, pos, null, formatData);
return pos - offset;
}
private int getTrailingRecordsSize() {
if (_text.length() < 1) {
return 0;
}
int encodedTextSize = 0;
int textBytesLength = _text.length() * LittleEndian.SHORT_SIZE;
while (textBytesLength > 0) {
int chunkSize = Math.min(RecordInputStream.MAX_RECORD_DATA_SIZE - 2, textBytesLength);
textBytesLength -= chunkSize;
encodedTextSize += 4; // +4 for ContinueRecord sid+size
encodedTextSize += 1+chunkSize; // +1 for compressed unicode flag,
}
int encodedFormatSize = (_text.numFormattingRuns() + 1) * FORMAT_RUN_ENCODED_SIZE
+ 4; // +4 for ContinueRecord sid+size
return encodedTextSize + encodedFormatSize;
}
public int serialize(int offset, byte[] data) {
int expectedTotalSize = getRecordSize();
int totalSize = serializeTXORecord(offset, data);
protected void serialize(ContinuableRecordOutput out) {
serializeTXORecord(out);
if (_text.getString().length() > 0) {
totalSize += serializeTrailingRecords(offset+totalSize, data);
serializeTrailingRecords(out);
}
if (totalSize != expectedTotalSize)
throw new RecordFormatException(totalSize
+ " bytes written but getRecordSize() reports " + expectedTotalSize);
return totalSize;
}
/**
* Note - this total size includes all potential {@link ContinueRecord}s written
* but it is not the "ushort size" value to be written at the start of the first BIFF record
*/
protected int getDataSize() {
return getCurrentRecordDataSize() + getTrailingRecordsSize();
}
private int getFormattingDataLength() {
if (_text.length() < 1) {
// important - no formatting data if text is empty
@ -287,25 +205,17 @@ public final class TextObjectRecord extends Record {
return (_text.numFormattingRuns() + 1) * FORMAT_RUN_ENCODED_SIZE;
}
private static byte[] createFormatData(HSSFRichTextString str) {
private static void writeFormatData(ContinuableRecordOutput out , HSSFRichTextString str) {
int nRuns = str.numFormattingRuns();
byte[] result = new byte[(nRuns + 1) * FORMAT_RUN_ENCODED_SIZE];
int pos = 0;
for (int i = 0; i < nRuns; i++) {
LittleEndian.putUShort(result, pos, str.getIndexOfFormattingRun(i));
pos += 2;
out.writeShort(str.getIndexOfFormattingRun(i));
int fontIndex = str.getFontOfFormattingRun(i);
LittleEndian.putUShort(result, pos, fontIndex == str.NO_FONT ? 0 : fontIndex);
pos += 2;
pos += 4; // skip reserved
out.writeShort(fontIndex == str.NO_FONT ? 0 : fontIndex);
out.writeInt(0); // skip reserved
}
LittleEndian.putUShort(result, pos, str.length());
pos += 2;
LittleEndian.putUShort(result, pos, 0);
pos += 2;
pos += 4; // skip reserved
return result;
out.writeShort(str.length());
out.writeShort(0);
out.writeInt(0); // skip reserved
}
/**

View File

@ -17,30 +17,31 @@
package org.apache.poi.hssf.record;
import org.apache.poi.util.BitField;
import org.apache.poi.util.BitFieldFactory;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.HexDump;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
import org.apache.poi.util.BitField;
import org.apache.poi.util.BitFieldFactory;
import org.apache.poi.util.HexDump;
import org.apache.poi.util.LittleEndianInput;
import org.apache.poi.util.LittleEndianOutput;
/**
* Title: Unicode String<P>
* Description: Unicode String record. We implement these as a record, although
* they are really just standard fields that are in several records.
* It is considered more desirable then repeating it in all of them.<P>
* REFERENCE: PG 264 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)<P>
* Title: Unicode String<p/>
* Description: Unicode String - just standard fields that are in several records.
* It is considered more desirable then repeating it in all of them.<p/>
* REFERENCE: PG 264 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)<p/>
* @author Andrew C. Oliver
* @author Marc Johnson (mjohnson at apache dot org)
* @author Glen Stampoultzis (glens at apache.org)
*/
public final class UnicodeString implements Comparable {
private short field_1_charCount; // = 0;
private byte field_2_optionflags; // = 0;
private String field_3_string; // = null;
private short field_1_charCount;
private byte field_2_optionflags;
private String field_3_string;
private List field_4_format_runs;
private byte[] field_5_ext_rst;
private static final BitField highByte = BitFieldFactory.getInstance(0x1);
@ -56,6 +57,10 @@ public final class UnicodeString implements Comparable {
this.fontIndex = fontIndex;
}
public FormatRun(LittleEndianInput in) {
this(in.readShort(), in.readShort());
}
public short getCharacterPos() {
return character;
}
@ -65,13 +70,12 @@ public final class UnicodeString implements Comparable {
}
public boolean equals(Object o) {
if ((o == null) || (o.getClass() != this.getClass()))
{
if (!(o instanceof FormatRun)) {
return false;
}
FormatRun other = ( FormatRun ) o;
return ((character == other.character) && (fontIndex == other.fontIndex));
return character == other.character && fontIndex == other.fontIndex;
}
public int compareTo(Object obj) {
@ -86,6 +90,11 @@ public final class UnicodeString implements Comparable {
public String toString() {
return "character="+character+",fontIndex="+fontIndex;
}
public void serialize(LittleEndianOutput out) {
out.writeShort(character);
out.writeShort(fontIndex);
}
}
private UnicodeString() {
@ -116,13 +125,12 @@ public final class UnicodeString implements Comparable {
*/
public boolean equals(Object o)
{
if ((o == null) || (o.getClass() != this.getClass()))
{
if (!(o instanceof UnicodeString)) {
return false;
}
UnicodeString other = (UnicodeString) o;
//Ok lets do this in stages to return a quickly, first check the actual string
//OK lets do this in stages to return a quickly, first check the actual string
boolean eq = ((field_1_charCount == other.field_1_charCount)
&& (field_2_optionflags == other.field_2_optionflags)
&& field_3_string.equals(other.field_3_string));
@ -203,9 +211,7 @@ public final class UnicodeString implements Comparable {
if (isRichText() && (runCount > 0)) {
field_4_format_runs = new ArrayList(runCount);
for (int i=0;i<runCount;i++) {
field_4_format_runs.add(new FormatRun(in.readShort(), in.readShort()));
//read reserved
//in.readInt();
field_4_format_runs.add(new FormatRun(in));
}
}
@ -372,11 +378,8 @@ public final class UnicodeString implements Comparable {
field_2_optionflags = richText.clearByte(field_2_optionflags);
}
public byte[] getExtendedRst() {
return this.field_5_ext_rst;
}
public void setExtendedRst(byte[] ext_rst) {
void setExtendedRst(byte[] ext_rst) {
if (ext_rst != null)
field_2_optionflags = extBit.setByte(field_2_optionflags);
else field_2_optionflags = extBit.clearByte(field_2_optionflags);
@ -442,351 +445,43 @@ public final class UnicodeString implements Comparable {
return buffer.toString();
}
private int writeContinueIfRequired(UnicodeRecordStats stats, final int requiredSize, int offset, byte[] data) {
//Basic string overhead
if (stats.remainingSize < requiredSize) {
//Check if be are already in a continue record, if so make sure that
//we go back and write out our length
if (stats.lastLengthPos != -1) {
short lastRecordLength = (short)(offset - stats.lastLengthPos - 2);
if (lastRecordLength > 8224)
throw new InternalError();
LittleEndian.putShort(data, stats.lastLengthPos, lastRecordLength);
public void serialize(ContinuableRecordOutput out) {
int numberOfRichTextRuns = 0;
int extendedDataSize = 0;
if (isRichText() && field_4_format_runs != null) {
numberOfRichTextRuns = field_4_format_runs.size();
}
if (isExtendedText() && field_5_ext_rst != null) {
extendedDataSize = field_5_ext_rst.length;
}
LittleEndian.putShort(data, offset, ContinueRecord.sid);
offset+=2;
//Record the location of the last continue length position, but don't write
//anything there yet (since we don't know what it will be!)
stats.lastLengthPos = offset;
offset += 2;
out.writeString(field_3_string, numberOfRichTextRuns, extendedDataSize);
stats.recordSize += 4;
stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
}
return offset;
}
public int serialize(UnicodeRecordStats stats, final int offset, byte [] data)
{
int pos = offset;
//Basic string overhead
pos = writeContinueIfRequired(stats, 3, pos, data);
LittleEndian.putShort(data, pos, getCharCount());
pos += 2;
data[ pos ] = getOptionFlags();
pos += 1;
stats.recordSize += 3;
stats.remainingSize-= 3;
if (isRichText()) {
if (field_4_format_runs != null) {
pos = writeContinueIfRequired(stats, 2, pos, data);
LittleEndian.putShort(data, pos, (short) field_4_format_runs.size());
pos += 2;
stats.recordSize += 2;
stats.remainingSize -= 2;
}
}
if ( isExtendedText() )
{
if (this.field_5_ext_rst != null) {
pos = writeContinueIfRequired(stats, 4, pos, data);
LittleEndian.putInt(data, pos, field_5_ext_rst.length);
pos += 4;
stats.recordSize += 4;
stats.remainingSize -= 4;
}
}
int charsize = isUncompressedUnicode() ? 2 : 1;
int strSize = (getString().length() * charsize);
byte[] strBytes = null;
try {
String unicodeString = getString();
if (!isUncompressedUnicode())
{
strBytes = unicodeString.getBytes("ISO-8859-1");
}
else
{
strBytes = unicodeString.getBytes("UTF-16LE");
}
}
catch (Exception e) {
throw new InternalError();
}
if (strSize != strBytes.length)
throw new InternalError("That shouldnt have happened!");
//Check to see if the offset occurs mid string, if so then we need to add
//the byte to start with that represents the first byte of the continue record.
if (strSize > stats.remainingSize) {
//OK the offset occurs half way through the string, that means that
//we need an extra byte after the continue record ie we didnt finish
//writing out the string the 1st time through
//But hang on, how many continue records did we span? What if this is
//a REALLY long string. We need to work this all out.
int amountThatCantFit = strSize;
int strPos = 0;
while (amountThatCantFit > 0) {
int amountWritten = Math.min(stats.remainingSize, amountThatCantFit);
//Make sure that the amount that can't fit takes into account
//whether we are writing double byte unicode
if (isUncompressedUnicode()) {
//We have the '-1' here because whether this is the first record or
//subsequent continue records, there is always the case that the
//number of bytes in a string on double byte boundaries is actually odd.
if ( ( (amountWritten ) % 2) == 1)
amountWritten--;
}
System.arraycopy(strBytes, strPos, data, pos, amountWritten);
pos += amountWritten;
strPos += amountWritten;
stats.recordSize += amountWritten;
stats.remainingSize -= amountWritten;
//Ok lets subtract what we can write
amountThatCantFit -= amountWritten;
//Each iteration of this while loop is another continue record, unless
//everything now fits.
if (amountThatCantFit > 0) {
//We know that a continue WILL be requied, but use this common method
pos = writeContinueIfRequired(stats, amountThatCantFit, pos, data);
//The first byte after a continue mid string is the extra byte to
//indicate if this run is compressed or not.
data[pos] = (byte) (isUncompressedUnicode() ? 0x1 : 0x0);
pos++;
stats.recordSize++;
stats.remainingSize --;
}
}
} else {
if (strSize > (data.length-pos))
System.out.println("Hmm shouldnt happen");
//Ok the string fits nicely in the remaining size
System.arraycopy(strBytes, 0, data, pos, strSize);
pos += strSize;
stats.recordSize += strSize;
stats.remainingSize -= strSize;
}
if (isRichText() && (field_4_format_runs != null)) {
int count = field_4_format_runs.size();
if (numberOfRichTextRuns > 0) {
//This will ensure that a run does not split a continue
for (int i=0;i<count;i++) {
pos = writeContinueIfRequired(stats, 4, pos, data);
for (int i=0;i<numberOfRichTextRuns;i++) {
if (out.getAvailableSpace() < 4) {
out.writeContinue();
}
FormatRun r = (FormatRun)field_4_format_runs.get(i);
LittleEndian.putShort(data, pos, r.character);
pos += 2;
LittleEndian.putShort(data, pos, r.fontIndex);
pos += 2;
//Each run count is four bytes
stats.recordSize += 4;
stats.remainingSize -=4;
r.serialize(out);
}
}
if (isExtendedText() && (field_5_ext_rst != null)) {
//Ok ExtRst is actually not documented, so i am going to hope
if (extendedDataSize > 0) {
// OK ExtRst is actually not documented, so i am going to hope
// that we can actually continue on byte boundaries
int ammountThatCantFit = field_5_ext_rst.length - stats.remainingSize;
int extPos = 0;
if (ammountThatCantFit > 0) {
while (ammountThatCantFit > 0) {
//So for this record we have already written
int ammountWritten = Math.min(stats.remainingSize, ammountThatCantFit);
System.arraycopy(field_5_ext_rst, extPos, data, pos, ammountWritten);
pos += ammountWritten;
extPos += ammountWritten;
stats.recordSize += ammountWritten;
stats.remainingSize -= ammountWritten;
//Ok lets subtract what we can write
ammountThatCantFit -= ammountWritten;
if (ammountThatCantFit > 0) {
pos = writeContinueIfRequired(stats, 1, pos, data);
while (true) {
int nBytesToWrite = Math.min(extendedDataSize - extPos, out.getAvailableSpace());
out.write(field_5_ext_rst, extPos, nBytesToWrite);
extPos += nBytesToWrite;
if (extPos >= extendedDataSize) {
break;
}
}
} else {
//We can fit wholey in what remains.
System.arraycopy(field_5_ext_rst, 0, data, pos, field_5_ext_rst.length);
pos += field_5_ext_rst.length;
stats.remainingSize -= field_5_ext_rst.length;
stats.recordSize += field_5_ext_rst.length;
}
}
return pos - offset;
}
public void setCompressedUnicode() {
field_2_optionflags = highByte.setByte(field_2_optionflags);
}
public void setUncompressedUnicode() {
field_2_optionflags = highByte.clearByte(field_2_optionflags);
}
private boolean isUncompressedUnicode()
{
return highByte.isSet(getOptionFlags());
}
/** Returns the size of this record, given the amount of record space
* remaining, it will also include the size of writing a continue record.
*/
public static class UnicodeRecordStats {
public int recordSize;
public int remainingSize = SSTRecord.MAX_RECORD_SIZE;
public int lastLengthPos = -1;
}
public void getRecordSize(UnicodeRecordStats stats) {
//Basic string overhead
if (stats.remainingSize < 3) {
//Needs a continue
stats.recordSize += 4;
stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
}
stats.recordSize += 3;
stats.remainingSize-= 3;
//Read the number of rich runs if rich text.
if ( isRichText() )
{
//Run count
if (stats.remainingSize < 2) {
//Needs a continue
//Reset the available space.
stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
//continue record overhead
stats.recordSize+=4;
}
stats.recordSize += 2;
stats.remainingSize -=2;
}
//Read the size of extended data if present.
if ( isExtendedText() )
{
//Needs a continue
//extension length
if (stats.remainingSize < 4) {
//Reset the available space.
stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
//continue record overhead
stats.recordSize+=4;
}
stats.recordSize += 4;
stats.remainingSize -=4;
}
int charsize = isUncompressedUnicode() ? 2 : 1;
int strSize = (getString().length() * charsize);
//Check to see if the offset occurs mid string, if so then we need to add
//the byte to start with that represents the first byte of the continue record.
if (strSize > stats.remainingSize) {
//Ok the offset occurs half way through the string, that means that
//we need an extra byte after the continue record ie we didnt finish
//writing out the string the 1st time through
//But hang on, how many continue records did we span? What if this is
//a REALLY long string. We need to work this all out.
int ammountThatCantFit = strSize;
while (ammountThatCantFit > 0) {
int ammountWritten = Math.min(stats.remainingSize, ammountThatCantFit);
//Make sure that the ammount that cant fit takes into account
//whether we are writing double byte unicode
if (isUncompressedUnicode()) {
//We have the '-1' here because whether this is the first record or
//subsequent continue records, there is always the case that the
//number of bytes in a string on doube byte boundaries is actually odd.
if ( ( (ammountWritten) % 2) == 1)
ammountWritten--;
}
stats.recordSize += ammountWritten;
stats.remainingSize -= ammountWritten;
//Ok lets subtract what we can write
ammountThatCantFit -= ammountWritten;
//Each iteration of this while loop is another continue record, unless
//everything now fits.
if (ammountThatCantFit > 0) {
//Reset the available space.
stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
//continue record overhead
stats.recordSize+=4;
//The first byte after a continue mid string is the extra byte to
//indicate if this run is compressed or not.
stats.recordSize++;
stats.remainingSize --;
}
}
} else {
//Ok the string fits nicely in the remaining size
stats.recordSize += strSize;
stats.remainingSize -= strSize;
}
if (isRichText() && (field_4_format_runs != null)) {
int count = field_4_format_runs.size();
//This will ensure that a run does not split a continue
for (int i=0;i<count;i++) {
if (stats.remainingSize < 4) {
//Reset the available space.
stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
//continue record overhead
stats.recordSize+=4;
}
//Each run count is four bytes
stats.recordSize += 4;
stats.remainingSize -=4;
}
}
if (isExtendedText() && (field_5_ext_rst != null)) {
//Ok ExtRst is actually not documented, so i am going to hope
//that we can actually continue on byte boundaries
int ammountThatCantFit = field_5_ext_rst.length - stats.remainingSize;
if (ammountThatCantFit > 0) {
while (ammountThatCantFit > 0) {
//So for this record we have already written
int ammountWritten = Math.min(stats.remainingSize, ammountThatCantFit);
stats.recordSize += ammountWritten;
stats.remainingSize -= ammountWritten;
//Ok lets subtract what we can write
ammountThatCantFit -= ammountWritten;
if (ammountThatCantFit > 0) {
//Each iteration of this while loop is another continue record.
//Reset the available space.
stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
//continue record overhead
stats.recordSize += 4;
}
}
} else {
//We can fit wholey in what remains.
stats.remainingSize -= field_5_ext_rst.length;
stats.recordSize += field_5_ext_rst.length;
out.writeContinue();
}
}
}
@ -801,9 +496,9 @@ public final class UnicodeString implements Comparable {
if (result != 0)
return result;
//Ok string appears to be equal but now lets compare formatting runs
//OK string appears to be equal but now lets compare formatting runs
if ((field_4_format_runs == null) && (str.field_4_format_runs == null))
//Strings are equal, and there are no formtting runs.
//Strings are equal, and there are no formatting runs.
return 0;
if ((field_4_format_runs == null) && (str.field_4_format_runs != null))
@ -850,12 +545,12 @@ public final class UnicodeString implements Comparable {
return 0;
}
public boolean isRichText()
private boolean isRichText()
{
return richText.isSet(getOptionFlags());
}
public boolean isExtendedText()
private boolean isExtendedText()
{
return extBit.isSet(getOptionFlags());
}
@ -881,6 +576,4 @@ public final class UnicodeString implements Comparable {
return str;
}
}

View File

@ -0,0 +1,69 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record.cont;
import org.apache.poi.hssf.record.ContinueRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.util.LittleEndianByteArrayOutputStream;
import org.apache.poi.util.LittleEndianOutput;
/**
* Common superclass of all records that can produce {@link ContinueRecord}s while being serialized.
*
* @author Josh Micich
*/
public abstract class ContinuableRecord extends Record {
protected ContinuableRecord() {
// no fields to initialise
}
/**
* Serializes this record's content to the supplied data output.<br/>
* The standard BIFF header (ushort sid, ushort size) has been handled by the superclass, so
* only BIFF data should be written by this method. Simple data types can be written with the
* standard {@link LittleEndianOutput} methods. Methods from {@link ContinuableRecordOutput}
* can be used to serialize strings (with {@link ContinueRecord}s being written as required).
* If necessary, implementors can explicitly start {@link ContinueRecord}s (regardless of the
* amount of remaining space).
*
* @param out a data output stream
*/
protected abstract void serialize(ContinuableRecordOutput out);
/**
* @return four less than the total length of the encoded record(s)
* (in the case when no {@link ContinueRecord} is needed, this is the
* same ushort value that gets encoded after the record sid
*/
protected final int getDataSize() {
ContinuableRecordOutput out = ContinuableRecordOutput.createForCountingOnly();
serialize(out);
out.terminate();
return out.getTotalSize() - 4;
}
public final int serialize(int offset, byte[] data) {
LittleEndianOutput leo = new LittleEndianByteArrayOutputStream(data, offset);
ContinuableRecordOutput out = new ContinuableRecordOutput(leo, getSid());
serialize(out);
out.terminate();
return out.getTotalSize();
}
}

View File

@ -0,0 +1,257 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record.cont;
import org.apache.poi.hssf.record.ContinueRecord;
import org.apache.poi.util.DelayableLittleEndianOutput;
import org.apache.poi.util.LittleEndianOutput;
import org.apache.poi.util.StringUtil;
/**
* An augmented {@link LittleEndianOutput} used for serialization of {@link ContinuableRecord}s.
* This class keeps track of how much remaining space is available in the current BIFF record and
* can start new {@link ContinueRecord}s as required.
*
* @author Josh Micich
*/
public final class ContinuableRecordOutput implements LittleEndianOutput {
private final LittleEndianOutput _out;
private UnknownLengthRecordOutput _ulrOutput;
private int _totalPreviousRecordsSize;
ContinuableRecordOutput(LittleEndianOutput out, int sid) {
_ulrOutput = new UnknownLengthRecordOutput(out, sid);
_out = out;
_totalPreviousRecordsSize = 0;
}
public static ContinuableRecordOutput createForCountingOnly() {
return new ContinuableRecordOutput(NOPOutput, -777); // fake sid
}
/**
* @return total number of bytes written so far (including all BIFF headers)
*/
public int getTotalSize() {
return _totalPreviousRecordsSize + _ulrOutput.getTotalSize();
}
/**
* Terminates the last record (also updates its 'ushort size' field)
*/
void terminate() {
_ulrOutput.terminate();
}
/**
* @return number of remaining bytes of space in current record
*/
public int getAvailableSpace() {
return _ulrOutput.getAvailableSpace();
}
/**
* Terminates the current record and starts a new {@link ContinueRecord} (regardless
* of how much space is still available in the current record).
*/
public void writeContinue() {
_ulrOutput.terminate();
_totalPreviousRecordsSize += _ulrOutput.getTotalSize();
_ulrOutput = new UnknownLengthRecordOutput(_out, ContinueRecord.sid);
}
public void writeContinueIfRequired(int requiredContinuousSize) {
if (_ulrOutput.getAvailableSpace() < requiredContinuousSize) {
writeContinue();
}
}
/**
* Writes the 'optionFlags' byte and encoded character data of a unicode string. This includes:
* <ul>
* <li>byte optionFlags</li>
* <li>encoded character data (in "ISO-8859-1" or "UTF-16LE" encoding)</li>
* </ul>
*
* Notes:
* <ul>
* <li>The value of the 'is16bitEncoded' flag is determined by the actual character data
* of <tt>text</tt></li>
* <li>The string options flag is never separated (by a {@link ContinueRecord}) from the
* first chunk of character data it refers to.</li>
* <li>The 'ushort length' field is assumed to have been explicitly written earlier. Hence,
* there may be an intervening {@link ContinueRecord}</li>
* </ul>
*/
public void writeStringData(String text) {
boolean is16bitEncoded = StringUtil.hasMultibyte(text);
// calculate total size of the header and first encoded char
int keepTogetherSize = 1 + 1; // ushort len, at least one character byte
int optionFlags = 0x00;
if (is16bitEncoded) {
optionFlags |= 0x01;
keepTogetherSize += 1; // one extra byte for first char
}
writeContinueIfRequired(keepTogetherSize);
writeByte(optionFlags);
writeCharacterData(text, is16bitEncoded);
}
/**
* Writes a unicode string complete with header and character data. This includes:
* <ul>
* <li>ushort length</li>
* <li>byte optionFlags</li>
* <li>ushort numberOfRichTextRuns (optional)</li>
* <li>ushort extendedDataSize (optional)</li>
* <li>encoded character data (in "ISO-8859-1" or "UTF-16LE" encoding)</li>
* </ul>
*
* The following bits of the 'optionFlags' byte will be set as appropriate:
* <table border='1'>
* <tr><th>Mask</th><th>Description</th></tr>
* <tr><td>0x01</td><td>is16bitEncoded</td></tr>
* <tr><td>0x04</td><td>hasExtendedData</td></tr>
* <tr><td>0x08</td><td>isRichText</td></tr>
* </table>
* Notes:
* <ul>
* <li>The value of the 'is16bitEncoded' flag is determined by the actual character data
* of <tt>text</tt></li>
* <li>The string header fields are never separated (by a {@link ContinueRecord}) from the
* first chunk of character data (i.e. the first character is always encoded in the same
* record as the string header).</li>
* </ul>
*/
public void writeString(String text, int numberOfRichTextRuns, int extendedDataSize) {
boolean is16bitEncoded = StringUtil.hasMultibyte(text);
// calculate total size of the header and first encoded char
int keepTogetherSize = 2 + 1 + 1; // ushort len, byte optionFlags, at least one character byte
int optionFlags = 0x00;
if (is16bitEncoded) {
optionFlags |= 0x01;
keepTogetherSize += 1; // one extra byte for first char
}
if (numberOfRichTextRuns > 0) {
optionFlags |= 0x08;
keepTogetherSize += 2;
}
if (extendedDataSize > 0) {
optionFlags |= 0x04;
keepTogetherSize += 4;
}
writeContinueIfRequired(keepTogetherSize);
writeShort(text.length());
writeByte(optionFlags);
if (numberOfRichTextRuns > 0) {
writeShort(numberOfRichTextRuns);
}
if (extendedDataSize > 0) {
writeInt(extendedDataSize);
}
writeCharacterData(text, is16bitEncoded);
}
private void writeCharacterData(String text, boolean is16bitEncoded) {
int nChars = text.length();
int i=0;
if (is16bitEncoded) {
while(true) {
int nWritableChars = Math.min(nChars-i, _ulrOutput.getAvailableSpace() / 2);
for ( ; nWritableChars > 0; nWritableChars--) {
_ulrOutput.writeShort(text.charAt(i++));
}
if (i >= nChars) {
break;
}
writeContinue();
writeByte(0x01);
}
} else {
while(true) {
int nWritableChars = Math.min(nChars-i, _ulrOutput.getAvailableSpace() / 1);
for ( ; nWritableChars > 0; nWritableChars--) {
_ulrOutput.writeByte(text.charAt(i++));
}
if (i >= nChars) {
break;
}
writeContinue();
writeByte(0x00);
}
}
}
public void write(byte[] b) {
writeContinueIfRequired(b.length);
_ulrOutput.write(b);
}
public void write(byte[] b, int offset, int len) {
writeContinueIfRequired(len);
_ulrOutput.write(b, offset, len);
}
public void writeByte(int v) {
writeContinueIfRequired(1);
_ulrOutput.writeByte(v);
}
public void writeDouble(double v) {
writeContinueIfRequired(8);
_ulrOutput.writeDouble(v);
}
public void writeInt(int v) {
writeContinueIfRequired(4);
_ulrOutput.writeInt(v);
}
public void writeLong(long v) {
writeContinueIfRequired(8);
_ulrOutput.writeLong(v);
}
public void writeShort(int v) {
writeContinueIfRequired(2);
_ulrOutput.writeShort(v);
}
/**
* Allows optimised usage of {@link ContinuableRecordOutput} for sizing purposes only.
*/
private static final LittleEndianOutput NOPOutput = new DelayableLittleEndianOutput() {
public LittleEndianOutput createDelayedOutput(int size) {
return this;
}
public void write(byte[] b) {
// does nothing
}
public void write(byte[] b, int offset, int len) {
// does nothing
}
public void writeByte(int v) {
// does nothing
}
public void writeDouble(double v) {
// does nothing
}
public void writeInt(int v) {
// does nothing
}
public void writeLong(long v) {
// does nothing
}
public void writeShort(int v) {
// does nothing
}
};
}

View File

@ -0,0 +1,114 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record.cont;
import org.apache.poi.hssf.record.RecordInputStream;
import org.apache.poi.util.DelayableLittleEndianOutput;
import org.apache.poi.util.LittleEndianByteArrayOutputStream;
import org.apache.poi.util.LittleEndianOutput;
/**
* Allows the writing of BIFF records when the 'ushort size' header field is not known in advance.
* When the client is finished writing data, it calls {@link #terminate()}, at which point this
* class updates the 'ushort size' with its final value.
*
* @author Josh Micich
*/
final class UnknownLengthRecordOutput implements LittleEndianOutput {
private static final int MAX_DATA_SIZE = RecordInputStream.MAX_RECORD_DATA_SIZE;
private final LittleEndianOutput _originalOut;
/** for writing the 'ushort size' field once its value is known */
private final LittleEndianOutput _dataSizeOutput;
private final byte[] _byteBuffer;
private LittleEndianOutput _out;
private int _size;
public UnknownLengthRecordOutput(LittleEndianOutput out, int sid) {
_originalOut = out;
out.writeShort(sid);
if (out instanceof DelayableLittleEndianOutput) {
// optimisation
DelayableLittleEndianOutput dleo = (DelayableLittleEndianOutput) out;
_dataSizeOutput = dleo.createDelayedOutput(2);
_byteBuffer = null;
_out = out;
} else {
// otherwise temporarily write all subsequent data to a buffer
_dataSizeOutput = out;
_byteBuffer = new byte[RecordInputStream.MAX_RECORD_DATA_SIZE];
_out = new LittleEndianByteArrayOutputStream(_byteBuffer, 0);
}
}
/**
* includes 4 byte header
*/
public int getTotalSize() {
return 4 + _size;
}
public int getAvailableSpace() {
if (_out == null) {
throw new IllegalStateException("Record already terminated");
}
return MAX_DATA_SIZE - _size;
}
/**
* Finishes writing the current record and updates 'ushort size' field.<br/>
* After this method is called, only {@link #getTotalSize()} may be called.
*/
public void terminate() {
if (_out == null) {
throw new IllegalStateException("Record already terminated");
}
_dataSizeOutput.writeShort(_size);
if (_byteBuffer != null) {
_originalOut.write(_byteBuffer, 0, _size);
_out = null;
return;
}
_out = null;
}
public void write(byte[] b) {
_out.write(b);
_size += b.length;
}
public void write(byte[] b, int offset, int len) {
_out.write(b, offset, len);
_size += len;
}
public void writeByte(int v) {
_out.writeByte(v);
_size += 1;
}
public void writeDouble(double v) {
_out.writeDouble(v);
_size += 8;
}
public void writeInt(int v) {
_out.writeInt(v);
_size += 4;
}
public void writeLong(long v) {
_out.writeLong(v);
_size += 8;
}
public void writeShort(int v) {
_out.writeShort(v);
_size += 2;
}
}

View File

@ -0,0 +1,34 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.util;
/**
* Implementors of this interface allow client code to 'delay' writing to a certain section of a
* data output stream.<br/>
* A typical application is for writing BIFF records when the size is not known until well after
* the header has been written. The client code can call {@link #createDelayedOutput(int)}
* to reserve two bytes of the output for the 'ushort size' header field. The delayed output can
* be written at any stage.
*
* @author Josh Micich
*/
public interface DelayableLittleEndianOutput extends LittleEndianOutput {
/**
* Creates an output stream intended for outputting a sequence of <tt>size</tt> bytes.
*/
LittleEndianOutput createDelayedOutput(int size);
}

View File

@ -24,7 +24,7 @@ package org.apache.poi.util;
*
* @author Josh Micich
*/
public final class LittleEndianByteArrayOutputStream implements LittleEndianOutput {
public final class LittleEndianByteArrayOutputStream implements LittleEndianOutput, DelayableLittleEndianOutput {
private final byte[] _buf;
private final int _endIndex;
private int _writeIndex;
@ -89,4 +89,10 @@ public final class LittleEndianByteArrayOutputStream implements LittleEndianOutp
public int getWriteIndex() {
return _writeIndex;
}
public LittleEndianOutput createDelayedOutput(int size) {
checkPosition(size);
LittleEndianOutput result = new LittleEndianByteArrayOutputStream(_buf, _writeIndex, _writeIndex+size);
_writeIndex += size;
return result;
}
}

View File

@ -48,7 +48,6 @@ public final class TestRecordFactory extends TestCase {
byte[] data = {
0, 6, 5, 0, -2, 28, -51, 7, -55, 64, 0, 0, 6, 1, 0, 0
};
short size = 16;
Record[] record = RecordFactory.createRecord(TestcaseRecordInputStream.create(recType, data));
assertEquals(BOFRecord.class.getName(),
@ -64,7 +63,6 @@ public final class TestRecordFactory extends TestCase {
assertEquals(5, bofRecord.getType());
assertEquals(1536, bofRecord.getVersion());
recType = MMSRecord.sid;
size = 2;
data = new byte[]
{
0, 0
@ -93,7 +91,6 @@ public final class TestRecordFactory extends TestCase {
byte[] data = {
0, 0, 0, 0, 21, 0, 0, 0, 0, 0
};
short size = 10;
Record[] record = RecordFactory.createRecord(TestcaseRecordInputStream.create(recType, data));
assertEquals(NumberRecord.class.getName(),
@ -154,34 +151,34 @@ public final class TestRecordFactory extends TestCase {
*/
public void testMixedContinue() throws Exception {
/**
* Taken from a real test sample file 39512.xls. See Bug 39512 for details.
* Adapted from a real test sample file 39512.xls (Offset 0x4854).
* See Bug 39512 for details.
*/
String dump =
//OBJ
"5D, 00, 48, 00, 15, 00, 12, 00, 0C, 00, 3C, 00, 11, 00, A0, 2E, 03, 01, CC, 42, " +
"CF, 00, 00, 00, 00, 00, 0A, 00, 0C, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, " +
"03, 00, 0B, 00, 06, 00, 28, 01, 03, 01, 00, 00, 12, 00, 08, 00, 00, 00, 00, 00, " +
"00, 00, 03, 00, 11, 00, 04, 00, 3D, 00, 00, 00, 00, 00, 00, 00, " +
"5D 00 48 00 15 00 12 00 0C 00 3C 00 11 00 A0 2E 03 01 CC 42 " +
"CF 00 00 00 00 00 0A 00 0C 00 00 00 00 00 00 00 00 00 00 00 " +
"03 00 0B 00 06 00 28 01 03 01 00 00 12 00 08 00 00 00 00 00 " +
"00 00 03 00 11 00 04 00 3D 00 00 00 00 00 00 00 " +
//MSODRAWING
"EC, 00, 08, 00, 00, 00, 0D, F0, 00, 00, 00, 00, " +
//TXO
"B6, 01, 12, 00, 22, 02, 00, 00, 00, 00, 00, 00, 00, 00, 10, 00, 10, 00, 00, 00, " +
"00, 00, 3C, 00, 21, 00, 01, 4F, 00, 70, 00, 74, 00, 69, 00, 6F, 00, 6E, 00, 20, " +
"00, 42, 00, 75, 00, 74, 00, 74, 00, 6F, 00, 6E, 00, 20, 00, 33, 00, 39, 00, 3C, " +
"00, 10, 00, 00, 00, 05, 00, 00, 00, 00, 00, 10, 00, 00, 00, 00, 00, 00, 00, " +
//CONTINUE
"3C, 00, 7E, 00, 0F, 00, 04, F0, 7E, 00, 00, 00, 92, 0C, 0A, F0, 08, 00, 00, 00, " +
"3D, 04, 00, 00, 00, 0A, 00, 00, A3, 00, 0B, F0, 3C, 00, 00, 00, 7F, 00, 00, 01, " +
"00, 01, 80, 00, 8C, 01, 03, 01, 85, 00, 01, 00, 00, 00, 8B, 00, 02, 00, 00, 00, " +
"BF, 00, 08, 00, 1A, 00, 7F, 01, 29, 00, 29, 00, 81, 01, 41, 00, 00, 08, BF, 01, " +
"00, 00, 10, 00, C0, 01, 40, 00, 00, 08, FF, 01, 00, 00, 08, 00, 00, 00, 10, F0, " +
"12, 00, 00, 00, 02, 00, 02, 00, A0, 03, 18, 00, B5, 00, 04, 00, 30, 02, 1A, 00, " +
"00, 00, 00, 00, 11, F0, 00, 00, 00, 00, " +
"EC 00 08 00 00 00 0D F0 00 00 00 00 " +
//TXO (and 2 trailing CONTINUE records)
"B6 01 12 00 22 02 00 00 00 00 00 00 00 00 10 00 10 00 00 00 00 00 " +
"3C 00 11 00 00 4F 70 74 69 6F 6E 20 42 75 74 74 6F 6E 20 33 39 " +
"3C 00 10 00 00 00 05 00 00 00 00 00 10 00 00 00 00 00 00 00 " +
// another CONTINUE
"3C 00 7E 00 0F 00 04 F0 7E 00 00 00 92 0C 0A F0 08 00 00 00 " +
"3D 04 00 00 00 0A 00 00 A3 00 0B F0 3C 00 00 00 7F 00 00 01 " +
"00 01 80 00 8C 01 03 01 85 00 01 00 00 00 8B 00 02 00 00 00 " +
"BF 00 08 00 1A 00 7F 01 29 00 29 00 81 01 41 00 00 08 BF 01 " +
"00 00 10 00 C0 01 40 00 00 08 FF 01 00 00 08 00 00 00 10 F0 " +
"12 00 00 00 02 00 02 00 A0 03 18 00 B5 00 04 00 30 02 1A 00 " +
"00 00 00 00 11 F0 00 00 00 00 " +
//OBJ
"5D, 00, 48, 00, 15, 00, 12, 00, 0C, 00, 3D, 00, 11, 00, 8C, 01, 03, 01, C8, 59, CF, 00, 00, " +
"00, 00, 00, 0A, 00, 0C, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 03, 00, 0B, 00, 06, 00, " +
"7C, 16, 03, 01, 00, 00, 12, 00, 08, 00, 00, 00, 00, 00, 00, 00, 03, 00, 11, 00, 04, 00, 01, " +
"00, 00, 00, 00, 00, 00, 00";
"5D 00 48 00 15 00 12 00 0C 00 3D 00 11 00 8C 01 03 01 C8 59 CF 00 00 " +
"00 00 00 0A 00 0C 00 00 00 00 00 00 00 00 00 00 00 03 00 0B 00 06 00 " +
"7C 16 03 01 00 00 12 00 08 00 00 00 00 00 00 00 03 00 11 00 04 00 01 " +
"00 00 00 00 00 00 00";
byte[] data = HexRead.readFromString(dump);
List records = RecordFactory.createRecords(new ByteArrayInputStream(data));

View File

@ -19,6 +19,7 @@ package org.apache.poi.hssf.record;
import junit.framework.TestCase;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
import org.apache.poi.util.IntMapper;
/**
@ -35,8 +36,10 @@ public final class TestSSTRecordSizeCalculator extends TestCase {
private void confirmSize(int expectedSize) {
SSTRecordSizeCalculator calculator = new SSTRecordSizeCalculator(strings);
assertEquals(expectedSize, calculator.getRecordSize());
ContinuableRecordOutput cro = ContinuableRecordOutput.createForCountingOnly();
SSTSerializer ss = new SSTSerializer(strings, 0, 0);
ss.serialize(cro);
assertEquals(expectedSize, cro.getTotalSize());
}
public void testBasic() {

View File

@ -18,6 +18,12 @@
package org.apache.poi.hssf.record;
import org.apache.poi.util.HexRead;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianByteArrayInputStream;
import org.apache.poi.util.LittleEndianInput;
import junit.framework.AssertionFailedError;
import junit.framework.TestCase;
/**
@ -28,12 +34,12 @@ import junit.framework.TestCase;
* @author Glen Stampoultzis (glens at apache.org)
*/
public final class TestStringRecord extends TestCase {
byte[] data = new byte[] {
(byte)0x0B,(byte)0x00, // length
(byte)0x00, // option
private static final byte[] data = HexRead.readFromString(
"0B 00 " + // length
"00 " + // option
// string
(byte)0x46,(byte)0x61,(byte)0x68,(byte)0x72,(byte)0x7A,(byte)0x65,(byte)0x75,(byte)0x67,(byte)0x74,(byte)0x79,(byte)0x70
};
"46 61 68 72 7A 65 75 67 74 79 70"
);
public void testLoad() {
@ -43,8 +49,7 @@ public final class TestStringRecord extends TestCase {
assertEquals( 18, record.getRecordSize() );
}
public void testStore()
{
public void testStore() {
StringRecord record = new StringRecord();
record.setString("Fahrzeugtyp");
@ -53,4 +58,42 @@ public final class TestStringRecord extends TestCase {
for (int i = 0; i < data.length; i++)
assertEquals("At offset " + i, data[i], recordBytes[i+4]);
}
public void testContinue() {
int MAX_BIFF_DATA = RecordInputStream.MAX_RECORD_DATA_SIZE;
int TEXT_LEN = MAX_BIFF_DATA + 1000; // deliberately over-size
String textChunk = "ABCDEGGHIJKLMNOP"; // 16 chars
StringBuffer sb = new StringBuffer(16384);
while (sb.length() < TEXT_LEN) {
sb.append(textChunk);
}
sb.setLength(TEXT_LEN);
StringRecord sr = new StringRecord();
sr.setString(sb.toString());
byte[] ser = sr.serialize();
assertEquals(StringRecord.sid, LittleEndian.getUShort(ser, 0));
if (LittleEndian.getUShort(ser, 2) > MAX_BIFF_DATA) {
throw new AssertionFailedError(
"StringRecord should have been split with a continue record");
}
// Confirm expected size of first record, and ushort strLen.
assertEquals(MAX_BIFF_DATA, LittleEndian.getUShort(ser, 2));
assertEquals(TEXT_LEN, LittleEndian.getUShort(ser, 4));
// Confirm first few bytes of ContinueRecord
LittleEndianInput crIn = new LittleEndianByteArrayInputStream(ser, (MAX_BIFF_DATA + 4));
int nCharsInFirstRec = MAX_BIFF_DATA - (2 + 1); // strLen, optionFlags
int nCharsInSecondRec = TEXT_LEN - nCharsInFirstRec;
assertEquals(ContinueRecord.sid, crIn.readUShort());
assertEquals(1 + nCharsInSecondRec, crIn.readUShort());
assertEquals(0, crIn.readUByte());
assertEquals('N', crIn.readUByte());
assertEquals('O', crIn.readUByte());
// re-read and make sure string value is the same
RecordInputStream in = TestcaseRecordInputStream.create(ser);
StringRecord sr2 = new StringRecord(in);
assertEquals(sb.toString(), sr2.getString());
}
}

View File

@ -44,9 +44,9 @@ public final class TestTextObjectBaseRecord extends TestCase {
"00 00" +
"00 00 " +
"3C 00 " + // ContinueRecord.sid
"05 00 " + // size 5
"01 " + // unicode uncompressed
"41 00 42 00 " + // 'AB'
"03 00 " + // size 3
"00 " + // unicode compressed
"41 42 " + // 'AB'
"3C 00 " + // ContinueRecord.sid
"10 00 " + // size 16
"00 00 18 00 00 00 00 00 " +
@ -63,7 +63,7 @@ public final class TestTextObjectBaseRecord extends TestCase {
assertEquals(true, record.isTextLocked());
assertEquals(TextObjectRecord.TEXT_ORIENTATION_ROT_RIGHT, record.getTextOrientation());
assertEquals(51, record.getRecordSize() );
assertEquals(49, record.getRecordSize() );
}
public void testStore()

View File

@ -41,10 +41,8 @@ public final class TestTextObjectRecord extends TestCase {
"12 02 00 00 00 00 00 00" +
"00 00 0D 00 08 00 00 00" +
"00 00 " +
"3C 00 1B 00 " +
"01 48 00 65 00 6C 00 6C 00 6F 00 " +
"2C 00 20 00 57 00 6F 00 72 00 6C " +
"00 64 00 21 00 " +
"3C 00 0E 00 " +
"00 48 65 6C 6C 6F 2C 20 57 6F 72 6C 64 21 " +
"3C 00 08 " +
"00 0D 00 00 00 00 00 00 00"
);
@ -169,9 +167,9 @@ public final class TestTextObjectRecord extends TestCase {
"13 " + // ??
"3C 00 " + // ContinueRecord.sid
"05 00 " + // size 5
"01 " + // unicode uncompressed
"41 00 42 00 " + // 'AB'
"03 00 " + // size 3
"00 " + // unicode compressed
"41 42 " + // 'AB'
"3C 00 " + // ContinueRecord.sid
"10 00 " + // size 16
"00 00 18 00 00 00 00 00 " +

View File

@ -19,8 +19,11 @@ package org.apache.poi.hssf.record;
import junit.framework.TestCase;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
/**
* Tests that records size calculates correctly.
* Tests that {@link UnicodeString} record size calculates correctly. The record size
* is used when serializing {@link SSTRecord}s.
*
* @author Jason Height (jheight at apache.org)
*/
@ -33,11 +36,23 @@ public final class TestUnicodeString extends TestCase {
private static void confirmSize(int expectedSize, UnicodeString s) {
confirmSize(expectedSize, s, 0);
}
/**
* Note - a value of zero for <tt>amountUsedInCurrentRecord</tt> would only ever occur just
* after a {@link ContinueRecord} had been started. In the initial {@link SSTRecord} this
* value starts at 8 (for the first {@link UnicodeString} written). In general, it can be
* any value between 0 and {@link #MAX_DATA_SIZE}
*/
private static void confirmSize(int expectedSize, UnicodeString s, int amountUsedInCurrentRecord) {
UnicodeString.UnicodeRecordStats stats = new UnicodeString.UnicodeRecordStats();
stats.remainingSize = MAX_DATA_SIZE-amountUsedInCurrentRecord;
s.getRecordSize(stats);
assertEquals(expectedSize, stats.recordSize);
ContinuableRecordOutput out = ContinuableRecordOutput.createForCountingOnly();
out.writeContinue();
for(int i=amountUsedInCurrentRecord; i>0; i--) {
out.writeByte(0);
}
int size0 = out.getTotalSize();
s.serialize(out);
int size1 = out.getTotalSize();
int actualSize = size1-size0;
assertEquals(expectedSize, actualSize);
}
public void testSmallStringSize() {