2005-08-18 03:06:44 -04:00
|
|
|
/* ====================================================================
|
2006-12-22 14:18:16 -05:00
|
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
|
|
this work for additional information regarding copyright ownership.
|
|
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
(the "License"); you may not use this file except in compliance with
|
|
|
|
the License. You may obtain a copy of the License at
|
2005-08-18 03:06:44 -04:00
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
==================================================================== */
|
|
|
|
|
|
|
|
package org.apache.poi.hssf.record;
|
|
|
|
|
2008-10-24 19:13:44 -04:00
|
|
|
import java.io.ByteArrayOutputStream;
|
2005-08-18 03:06:44 -04:00
|
|
|
import java.io.InputStream;
|
2008-10-24 19:13:44 -04:00
|
|
|
|
|
|
|
import org.apache.poi.util.LittleEndian;
|
|
|
|
import org.apache.poi.util.LittleEndianInput;
|
|
|
|
import org.apache.poi.util.LittleEndianInputStream;
|
2005-08-18 03:06:44 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Title: Record Input Stream<P>
|
|
|
|
* Description: Wraps a stream and provides helper methods for the construction of records.<P>
|
|
|
|
*
|
|
|
|
* @author Jason Height (jheight @ apache dot org)
|
|
|
|
*/
|
2008-10-23 15:08:42 -04:00
|
|
|
public final class RecordInputStream extends InputStream implements LittleEndianInput {
|
2008-10-10 18:59:14 -04:00
|
|
|
/** Maximum size of a single record (minus the 4 byte header) without a continue*/
|
|
|
|
public final static short MAX_RECORD_DATA_SIZE = 8224;
|
|
|
|
private static final int INVALID_SID_VALUE = -1;
|
2008-10-24 21:02:37 -04:00
|
|
|
/**
|
|
|
|
* When {@link #_currentDataLength} has this value, it means that the previous BIFF record is
|
|
|
|
* finished, the next sid has been properly read, but the data size field has not been read yet.
|
|
|
|
*/
|
2008-10-24 19:13:44 -04:00
|
|
|
private static final int DATA_LEN_NEEDS_TO_BE_READ = -1;
|
|
|
|
private static final byte[] EMPTY_BYTE_ARRAY = { };
|
|
|
|
|
2008-10-24 21:02:37 -04:00
|
|
|
/** {@link LittleEndianInput} facet of the wrapped {@link InputStream} */
|
2008-10-24 19:13:44 -04:00
|
|
|
private final LittleEndianInput _le;
|
2008-10-24 21:02:37 -04:00
|
|
|
/** the record identifier of the BIFF record currently being read */
|
|
|
|
private int _currentSid;
|
|
|
|
/**
|
|
|
|
* Length of the data section of the current BIFF record (always 4 less than the total record size).
|
|
|
|
* When uninitialised, this field is set to {@link #DATA_LEN_NEEDS_TO_BE_READ}.
|
|
|
|
*/
|
2008-10-24 19:13:44 -04:00
|
|
|
private int _currentDataLength;
|
2008-10-24 21:02:37 -04:00
|
|
|
/**
|
|
|
|
* The BIFF record identifier for the next record is read when just as the current record
|
|
|
|
* is finished.
|
|
|
|
* This field is only really valid during the time that ({@link #_currentDataLength} ==
|
|
|
|
* {@link #DATA_LEN_NEEDS_TO_BE_READ}). At most other times its value is not really the
|
|
|
|
* 'sid of the next record'. Wwhile mid-record, this field coincidentally holds the sid
|
|
|
|
* of the current record.
|
|
|
|
*/
|
|
|
|
private int _nextSid;
|
|
|
|
/**
|
|
|
|
* index within the data section of the current BIFF record
|
|
|
|
*/
|
|
|
|
private int _currentDataOffset;
|
2008-10-24 19:13:44 -04:00
|
|
|
|
|
|
|
public RecordInputStream(InputStream in) throws RecordFormatException {
|
|
|
|
if (in instanceof LittleEndianInput) {
|
|
|
|
// accessing directly is an optimisation
|
|
|
|
_le = (LittleEndianInput) in;
|
|
|
|
} else {
|
|
|
|
// less optimal, but should work OK just the same. Often occurs in junit tests.
|
|
|
|
_le = new LittleEndianInputStream(in);
|
|
|
|
}
|
2008-10-24 21:02:37 -04:00
|
|
|
_nextSid = readNextSid();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @returns the number of bytes available in the current BIFF record
|
|
|
|
* @see #remaining()
|
|
|
|
*/
|
|
|
|
public int available() {
|
|
|
|
return remaining();
|
2008-10-24 19:13:44 -04:00
|
|
|
}
|
2008-10-10 18:59:14 -04:00
|
|
|
|
2008-10-05 00:43:48 -04:00
|
|
|
public int read() {
|
|
|
|
checkRecordPosition(LittleEndian.BYTE_SIZE);
|
2008-10-24 21:02:37 -04:00
|
|
|
_currentDataOffset += LittleEndian.BYTE_SIZE;
|
2008-10-24 19:13:44 -04:00
|
|
|
return _le.readUByte();
|
|
|
|
}
|
|
|
|
public int read(byte[] b, int off, int len) {
|
|
|
|
int limit = Math.min(len, remaining());
|
|
|
|
if (limit == 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
readFully(b, off,limit);
|
|
|
|
return limit;
|
2008-10-05 00:43:48 -04:00
|
|
|
}
|
|
|
|
|
2008-10-24 21:02:37 -04:00
|
|
|
public short getSid() {
|
|
|
|
return (short) _currentSid;
|
|
|
|
}
|
2005-08-18 03:06:44 -04:00
|
|
|
|
2008-10-24 19:13:44 -04:00
|
|
|
/**
|
|
|
|
* Note - this method is expected to be called only when completed reading the current BIFF record.
|
|
|
|
* Calling this before reaching the end of the current record will cause all remaining data to be
|
|
|
|
* discarded
|
|
|
|
*/
|
|
|
|
public boolean hasNextRecord() {
|
2008-10-24 21:02:37 -04:00
|
|
|
if (_currentDataLength != -1 && _currentDataLength != _currentDataOffset) {
|
|
|
|
System.out.println("WARN. Unread "+remaining()+" bytes of record 0x"+Integer.toHexString(_currentSid));
|
2008-10-24 19:13:44 -04:00
|
|
|
// discard unread data
|
2008-10-24 21:02:37 -04:00
|
|
|
while (_currentDataOffset < _currentDataLength) {
|
2008-10-24 19:13:44 -04:00
|
|
|
readByte();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (_currentDataLength != DATA_LEN_NEEDS_TO_BE_READ) {
|
2008-10-24 21:02:37 -04:00
|
|
|
_nextSid = readNextSid();
|
2008-10-24 19:13:44 -04:00
|
|
|
}
|
2008-10-24 21:02:37 -04:00
|
|
|
return _nextSid != INVALID_SID_VALUE;
|
2008-10-24 19:13:44 -04:00
|
|
|
}
|
2005-08-18 03:06:44 -04:00
|
|
|
|
2008-10-24 19:13:44 -04:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @return the sid of the next record or {@link #INVALID_SID_VALUE} if at end of stream
|
|
|
|
*/
|
|
|
|
private int readNextSid() {
|
2008-10-24 21:02:37 -04:00
|
|
|
int nAvailable = _le.available();
|
2008-10-24 19:13:44 -04:00
|
|
|
if (nAvailable < EOFRecord.ENCODED_SIZE) {
|
|
|
|
if (nAvailable > 0) {
|
|
|
|
// some scrap left over?
|
|
|
|
// ex45582-22397.xls has one extra byte after the last record
|
|
|
|
// Excel reads that file OK
|
|
|
|
}
|
|
|
|
return INVALID_SID_VALUE;
|
|
|
|
}
|
|
|
|
int result = _le.readUShort();
|
|
|
|
if (result == INVALID_SID_VALUE) {
|
|
|
|
throw new RecordFormatException("Found invalid sid (" + result + ")");
|
|
|
|
}
|
2008-10-24 21:02:37 -04:00
|
|
|
_currentDataLength = DATA_LEN_NEEDS_TO_BE_READ;
|
2008-10-24 19:13:44 -04:00
|
|
|
return result;
|
|
|
|
}
|
2008-10-05 00:43:48 -04:00
|
|
|
|
2008-10-24 19:13:44 -04:00
|
|
|
/** Moves to the next record in the stream.
|
|
|
|
*
|
|
|
|
* <i>Note: The auto continue flag is reset to true</i>
|
|
|
|
*/
|
|
|
|
public void nextRecord() throws RecordFormatException {
|
2008-10-24 21:02:37 -04:00
|
|
|
if (_nextSid == INVALID_SID_VALUE) {
|
2008-10-24 19:13:44 -04:00
|
|
|
throw new IllegalStateException("EOF - next record not available");
|
|
|
|
}
|
2008-10-24 21:02:37 -04:00
|
|
|
if (_currentDataLength != DATA_LEN_NEEDS_TO_BE_READ) {
|
|
|
|
throw new IllegalStateException("Cannot call nextRecord() without checking hasNextRecord() first");
|
|
|
|
}
|
|
|
|
_currentSid = _nextSid;
|
|
|
|
_currentDataOffset = 0;
|
2008-10-24 19:13:44 -04:00
|
|
|
_currentDataLength = _le.readUShort();
|
|
|
|
if (_currentDataLength > MAX_RECORD_DATA_SIZE) {
|
|
|
|
throw new RecordFormatException("The content of an excel record cannot exceed "
|
|
|
|
+ MAX_RECORD_DATA_SIZE + " bytes");
|
|
|
|
}
|
|
|
|
}
|
2008-10-05 00:43:48 -04:00
|
|
|
|
|
|
|
private void checkRecordPosition(int requiredByteCount) {
|
2005-08-18 03:06:44 -04:00
|
|
|
|
2008-10-24 21:02:37 -04:00
|
|
|
int nAvailable = remaining();
|
|
|
|
if (nAvailable >= requiredByteCount) {
|
|
|
|
// all OK
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (nAvailable == 0 && isContinueNext()) {
|
|
|
|
nextRecord();
|
|
|
|
return;
|
2008-10-05 00:43:48 -04:00
|
|
|
}
|
2008-10-24 21:02:37 -04:00
|
|
|
throw new RecordFormatException("Not enough data (" + nAvailable
|
|
|
|
+ ") to read requested (" + requiredByteCount +") bytes");
|
2008-10-05 00:43:48 -04:00
|
|
|
}
|
2005-08-18 03:06:44 -04:00
|
|
|
|
2008-10-05 00:43:48 -04:00
|
|
|
/**
|
|
|
|
* Reads an 8 bit, signed value
|
|
|
|
*/
|
|
|
|
public byte readByte() {
|
|
|
|
checkRecordPosition(LittleEndian.BYTE_SIZE);
|
2008-10-24 21:02:37 -04:00
|
|
|
_currentDataOffset += LittleEndian.BYTE_SIZE;
|
2008-10-24 19:13:44 -04:00
|
|
|
return _le.readByte();
|
2008-10-05 00:43:48 -04:00
|
|
|
}
|
2005-08-18 03:06:44 -04:00
|
|
|
|
2008-10-05 00:43:48 -04:00
|
|
|
/**
|
|
|
|
* Reads a 16 bit, signed value
|
|
|
|
*/
|
|
|
|
public short readShort() {
|
|
|
|
checkRecordPosition(LittleEndian.SHORT_SIZE);
|
2008-10-24 21:02:37 -04:00
|
|
|
_currentDataOffset += LittleEndian.SHORT_SIZE;
|
2008-10-24 19:13:44 -04:00
|
|
|
return _le.readShort();
|
2008-10-05 00:43:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
public int readInt() {
|
|
|
|
checkRecordPosition(LittleEndian.INT_SIZE);
|
2008-10-24 21:02:37 -04:00
|
|
|
_currentDataOffset += LittleEndian.INT_SIZE;
|
2008-10-24 19:13:44 -04:00
|
|
|
return _le.readInt();
|
2008-10-05 00:43:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
public long readLong() {
|
|
|
|
checkRecordPosition(LittleEndian.LONG_SIZE);
|
2008-10-24 21:02:37 -04:00
|
|
|
_currentDataOffset += LittleEndian.LONG_SIZE;
|
2008-10-24 19:13:44 -04:00
|
|
|
return _le.readLong();
|
2008-10-05 00:43:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Reads an 8 bit, unsigned value
|
|
|
|
*/
|
2008-10-23 15:08:42 -04:00
|
|
|
public int readUByte() {
|
|
|
|
return readByte() & 0x00FF;
|
2008-10-05 00:43:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Reads a 16 bit, unsigned value.
|
|
|
|
* @return
|
|
|
|
*/
|
|
|
|
public int readUShort() {
|
|
|
|
checkRecordPosition(LittleEndian.SHORT_SIZE);
|
2008-10-24 21:02:37 -04:00
|
|
|
_currentDataOffset += LittleEndian.SHORT_SIZE;
|
2008-10-24 19:13:44 -04:00
|
|
|
return _le.readUShort();
|
2008-10-05 00:43:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
public double readDouble() {
|
|
|
|
checkRecordPosition(LittleEndian.DOUBLE_SIZE);
|
2008-10-24 21:02:37 -04:00
|
|
|
_currentDataOffset += LittleEndian.DOUBLE_SIZE;
|
2008-10-24 19:13:44 -04:00
|
|
|
long valueLongBits = _le.readLong();
|
2008-10-05 00:43:48 -04:00
|
|
|
double result = Double.longBitsToDouble(valueLongBits);
|
|
|
|
if (Double.isNaN(result)) {
|
|
|
|
throw new RuntimeException("Did not expect to read NaN"); // (Because Excel typically doesn't write NaN
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
2008-10-23 15:08:42 -04:00
|
|
|
public void readFully(byte[] buf) {
|
|
|
|
readFully(buf, 0, buf.length);
|
|
|
|
}
|
|
|
|
|
|
|
|
public void readFully(byte[] buf, int off, int len) {
|
|
|
|
checkRecordPosition(len);
|
2008-10-24 19:13:44 -04:00
|
|
|
_le.readFully(buf, off, len);
|
2008-10-24 21:02:37 -04:00
|
|
|
_currentDataOffset+=len;
|
2008-10-23 15:08:42 -04:00
|
|
|
}
|
2008-10-05 00:43:48 -04:00
|
|
|
|
2008-10-10 21:11:05 -04:00
|
|
|
public String readString() {
|
|
|
|
int requestedLength = readUShort();
|
|
|
|
byte compressFlag = readByte();
|
|
|
|
return readStringCommon(requestedLength, compressFlag == 0);
|
|
|
|
}
|
|
|
|
/**
|
|
|
|
* given a byte array of 16-bit unicode characters, compress to 8-bit and
|
|
|
|
* return a string
|
|
|
|
*
|
|
|
|
* { 0x16, 0x00 } -0x16
|
|
|
|
*
|
|
|
|
* @param requestedLength the length of the final string
|
|
|
|
* @return the converted string
|
|
|
|
* @exception IllegalArgumentException if len is too large (i.e.,
|
|
|
|
* there is not enough data in string to create a String of that
|
|
|
|
* length)
|
|
|
|
*/
|
|
|
|
public String readUnicodeLEString(int requestedLength) {
|
|
|
|
return readStringCommon(requestedLength, false);
|
|
|
|
}
|
2008-10-05 00:43:48 -04:00
|
|
|
|
2008-10-10 21:11:05 -04:00
|
|
|
public String readCompressedUnicode(int requestedLength) {
|
|
|
|
return readStringCommon(requestedLength, true);
|
|
|
|
}
|
2005-08-18 03:06:44 -04:00
|
|
|
|
2008-10-10 21:11:05 -04:00
|
|
|
private String readStringCommon(int requestedLength, boolean pIsCompressedEncoding) {
|
|
|
|
// Sanity check to detect garbage string lengths
|
|
|
|
if (requestedLength < 0 || requestedLength > 0x100000) { // 16 million chars?
|
|
|
|
throw new IllegalArgumentException("Bad requested string length (" + requestedLength + ")");
|
|
|
|
}
|
|
|
|
char[] buf = new char[requestedLength];
|
|
|
|
boolean isCompressedEncoding = pIsCompressedEncoding;
|
|
|
|
int curLen = 0;
|
|
|
|
while(true) {
|
|
|
|
int availableChars =isCompressedEncoding ? remaining() : remaining() / LittleEndian.SHORT_SIZE;
|
|
|
|
if (requestedLength - curLen <= availableChars) {
|
|
|
|
// enough space in current record, so just read it out
|
|
|
|
while(curLen < requestedLength) {
|
|
|
|
char ch;
|
|
|
|
if (isCompressedEncoding) {
|
|
|
|
ch = (char)readUByte();
|
|
|
|
} else {
|
|
|
|
ch = (char)readShort();
|
|
|
|
}
|
|
|
|
buf[curLen] = ch;
|
|
|
|
curLen++;
|
|
|
|
}
|
|
|
|
return new String(buf);
|
|
|
|
}
|
|
|
|
// else string has been spilled into next continue record
|
|
|
|
// so read what's left of the current record
|
|
|
|
while(availableChars > 0) {
|
|
|
|
char ch;
|
|
|
|
if (isCompressedEncoding) {
|
|
|
|
ch = (char)readUByte();
|
|
|
|
} else {
|
|
|
|
ch = (char)readShort();
|
|
|
|
}
|
|
|
|
buf[curLen] = ch;
|
|
|
|
curLen++;
|
|
|
|
availableChars--;
|
|
|
|
}
|
|
|
|
if (!isContinueNext()) {
|
|
|
|
throw new RecordFormatException("Expected to find a ContinueRecord in order to read remaining "
|
|
|
|
+ (requestedLength-curLen) + " of " + requestedLength + " chars");
|
|
|
|
}
|
|
|
|
if(remaining() != 0) {
|
|
|
|
throw new RecordFormatException("Odd number of bytes(" + remaining() + ") left behind");
|
|
|
|
}
|
|
|
|
nextRecord();
|
|
|
|
// note - the compressed flag may change on the fly
|
|
|
|
byte compressFlag = readByte();
|
|
|
|
isCompressedEncoding = (compressFlag == 0);
|
|
|
|
}
|
|
|
|
}
|
2008-10-05 00:43:48 -04:00
|
|
|
|
2006-08-26 04:23:22 -04:00
|
|
|
/** Returns an excel style unicode string from the bytes reminaing in the record.
|
|
|
|
* <i>Note:</i> Unicode strings differ from <b>normal</b> strings due to the addition of
|
|
|
|
* formatting information.
|
2008-10-05 00:43:48 -04:00
|
|
|
*
|
2006-08-26 04:23:22 -04:00
|
|
|
* @return The unicode string representation of the remaining bytes.
|
|
|
|
*/
|
2005-08-18 03:06:44 -04:00
|
|
|
public UnicodeString readUnicodeString() {
|
|
|
|
return new UnicodeString(this);
|
|
|
|
}
|
2008-10-05 00:43:48 -04:00
|
|
|
|
2008-10-24 19:13:44 -04:00
|
|
|
/** Returns the remaining bytes for the current record.
|
|
|
|
*
|
|
|
|
* @return The remaining bytes of the current record.
|
|
|
|
*/
|
|
|
|
public byte[] readRemainder() {
|
|
|
|
int size = remaining();
|
|
|
|
if (size ==0) {
|
|
|
|
return EMPTY_BYTE_ARRAY;
|
|
|
|
}
|
|
|
|
byte[] result = new byte[size];
|
|
|
|
readFully(result);
|
|
|
|
return result;
|
|
|
|
}
|
2008-10-05 00:43:48 -04:00
|
|
|
|
2005-08-18 03:06:44 -04:00
|
|
|
/** Reads all byte data for the current record, including any
|
|
|
|
* that overlaps into any following continue records.
|
2008-10-05 00:43:48 -04:00
|
|
|
*
|
2005-08-18 03:06:44 -04:00
|
|
|
* @deprecated Best to write a input stream that wraps this one where there is
|
|
|
|
* special sub record that may overlap continue records.
|
2008-10-05 00:43:48 -04:00
|
|
|
*/
|
2005-08-18 03:06:44 -04:00
|
|
|
public byte[] readAllContinuedRemainder() {
|
|
|
|
//Using a ByteArrayOutputStream is just an easy way to get a
|
|
|
|
//growable array of the data.
|
|
|
|
ByteArrayOutputStream out = new ByteArrayOutputStream(2*MAX_RECORD_DATA_SIZE);
|
|
|
|
|
|
|
|
while (isContinueNext()) {
|
2008-10-05 00:43:48 -04:00
|
|
|
byte[] b = readRemainder();
|
2005-08-18 03:06:44 -04:00
|
|
|
out.write(b, 0, b.length);
|
|
|
|
nextRecord();
|
|
|
|
}
|
2008-10-05 00:43:48 -04:00
|
|
|
byte[] b = readRemainder();
|
|
|
|
out.write(b, 0, b.length);
|
|
|
|
|
2005-08-18 03:06:44 -04:00
|
|
|
return out.toByteArray();
|
|
|
|
}
|
|
|
|
|
2008-10-24 19:13:44 -04:00
|
|
|
/** The remaining number of bytes in the <i>current</i> record.
|
|
|
|
*
|
|
|
|
* @return The number of bytes remaining in the current record
|
|
|
|
*/
|
|
|
|
public int remaining() {
|
|
|
|
if (_currentDataLength == DATA_LEN_NEEDS_TO_BE_READ) {
|
|
|
|
// already read sid of next record. so current one is finished
|
|
|
|
return 0;
|
|
|
|
}
|
2008-10-24 21:02:37 -04:00
|
|
|
return _currentDataLength - _currentDataOffset;
|
2008-10-24 19:13:44 -04:00
|
|
|
}
|
2005-08-18 03:06:44 -04:00
|
|
|
|
2008-10-24 19:13:44 -04:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @return <code>true</code> when a {@link ContinueRecord} is next.
|
|
|
|
*/
|
2008-10-24 21:02:37 -04:00
|
|
|
private boolean isContinueNext() {
|
|
|
|
if (_currentDataLength != DATA_LEN_NEEDS_TO_BE_READ && _currentDataOffset != _currentDataLength) {
|
2008-10-24 19:13:44 -04:00
|
|
|
throw new IllegalStateException("Should never be called before end of current record");
|
|
|
|
}
|
|
|
|
if (!hasNextRecord()) {
|
|
|
|
return false;
|
|
|
|
}
|
2008-10-24 21:02:37 -04:00
|
|
|
// At what point are records continued?
|
|
|
|
// - Often from within the char data of long strings (caller is within readStringCommon()).
|
|
|
|
// - From UnicodeString construction (many different points - call via checkRecordPosition)
|
|
|
|
// - During TextObjectRecord construction (just before the text, perhaps within the text,
|
|
|
|
// and before the formatting run data)
|
|
|
|
return _nextSid == ContinueRecord.sid;
|
2008-10-24 19:13:44 -04:00
|
|
|
}
|
2005-08-18 03:06:44 -04:00
|
|
|
}
|