Fix for bug 46545 - ObjRecord should ignore excessive padding written by previous POI versions

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@738709 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Josh Micich 2009-01-29 01:57:22 +00:00
parent 0b692dd75c
commit 980ff2cf30
4 changed files with 106 additions and 26 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! --> <!-- Don't forget to update status.xml too! -->
<release version="3.5-beta5" date="2008-??-??"> <release version="3.5-beta5" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="fix">46545 - Fixed ObjRecord to ignore excessive padding written by previous POI versions</action>
<action dev="POI-DEVELOPERS" type="fix">46613 - Fixed evaluator to perform case insensitive string comparisons</action> <action dev="POI-DEVELOPERS" type="fix">46613 - Fixed evaluator to perform case insensitive string comparisons</action>
<action dev="POI-DEVELOPERS" type="add">46544 - command line interface for hssf ExcelExtractor</action> <action dev="POI-DEVELOPERS" type="add">46544 - command line interface for hssf ExcelExtractor</action>
<action dev="POI-DEVELOPERS" type="fix">46547 - Allow addition of conditional formatting after data validation</action> <action dev="POI-DEVELOPERS" type="fix">46547 - Allow addition of conditional formatting after data validation</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! --> <!-- Don't forget to update changes.xml too! -->
<changes> <changes>
<release version="3.5-beta5" date="2008-??-??"> <release version="3.5-beta5" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="fix">46545 - Fixed ObjRecord to ignore excessive padding written by previous POI versions</action>
<action dev="POI-DEVELOPERS" type="fix">46613 - Fixed evaluator to perform case insensitive string comparisons</action> <action dev="POI-DEVELOPERS" type="fix">46613 - Fixed evaluator to perform case insensitive string comparisons</action>
<action dev="POI-DEVELOPERS" type="add">46544 - command line interface for hssf ExcelExtractor</action> <action dev="POI-DEVELOPERS" type="add">46544 - command line interface for hssf ExcelExtractor</action>
<action dev="POI-DEVELOPERS" type="fix">46547 - Allow addition of conditional formatting after data validation</action> <action dev="POI-DEVELOPERS" type="fix">46547 - Allow addition of conditional formatting after data validation</action>

View File

@ -39,9 +39,9 @@ public final class ObjRecord extends Record {
private static final int NORMAL_PAD_ALIGNMENT = 2; private static final int NORMAL_PAD_ALIGNMENT = 2;
private static int MAX_PAD_ALIGNMENT = 4; private static int MAX_PAD_ALIGNMENT = 4;
private List subrecords; private List<SubRecord> subrecords;
/** used when POI has no idea what is going on */ /** used when POI has no idea what is going on */
private byte[] _uninterpretedData; private final byte[] _uninterpretedData;
/** /**
* Excel seems to tolerate padding to quad or double byte length * Excel seems to tolerate padding to quad or double byte length
*/ */
@ -52,13 +52,14 @@ public final class ObjRecord extends Record {
public ObjRecord() { public ObjRecord() {
subrecords = new ArrayList(2); subrecords = new ArrayList<SubRecord>(2);
// TODO - ensure 2 sub-records (ftCmo 15h, and ftEnd 00h) are always created // TODO - ensure 2 sub-records (ftCmo 15h, and ftEnd 00h) are always created
_uninterpretedData = null;
} }
public ObjRecord(RecordInputStream in) { public ObjRecord(RecordInputStream in) {
// TODO - problems with OBJ sub-records stream // TODO - problems with OBJ sub-records stream
// MS spec says first sub-records is always CommonObjectDataSubRecord, // MS spec says first sub-record is always CommonObjectDataSubRecord,
// and last is // and last is
// always EndSubRecord. OOO spec does not mention ObjRecord(0x005D). // always EndSubRecord. OOO spec does not mention ObjRecord(0x005D).
// Existing POI test data seems to violate that rule. Some test data // Existing POI test data seems to violate that rule. Some test data
@ -74,6 +75,7 @@ public final class ObjRecord extends Record {
// Excel tolerates the funny ObjRecord, and replaces it with a corrected version // Excel tolerates the funny ObjRecord, and replaces it with a corrected version
// The exact logic/reasoning is not yet understood // The exact logic/reasoning is not yet understood
_uninterpretedData = subRecordData; _uninterpretedData = subRecordData;
subrecords = null;
return; return;
} }
if (subRecordData.length % 2 != 0) { if (subRecordData.length % 2 != 0) {
@ -83,7 +85,7 @@ public final class ObjRecord extends Record {
// System.out.println(HexDump.toHex(subRecordData)); // System.out.println(HexDump.toHex(subRecordData));
subrecords = new ArrayList(); subrecords = new ArrayList<SubRecord>();
ByteArrayInputStream bais = new ByteArrayInputStream(subRecordData); ByteArrayInputStream bais = new ByteArrayInputStream(subRecordData);
LittleEndianInputStream subRecStream = new LittleEndianInputStream(bais); LittleEndianInputStream subRecStream = new LittleEndianInputStream(bais);
while (true) { while (true) {
@ -98,13 +100,40 @@ public final class ObjRecord extends Record {
// At present (Oct-2008), most unit test samples have (subRecordData.length % 2 == 0) // At present (Oct-2008), most unit test samples have (subRecordData.length % 2 == 0)
_isPaddedToQuadByteMultiple = subRecordData.length % MAX_PAD_ALIGNMENT == 0; _isPaddedToQuadByteMultiple = subRecordData.length % MAX_PAD_ALIGNMENT == 0;
if (nRemainingBytes >= (_isPaddedToQuadByteMultiple ? MAX_PAD_ALIGNMENT : NORMAL_PAD_ALIGNMENT)) { if (nRemainingBytes >= (_isPaddedToQuadByteMultiple ? MAX_PAD_ALIGNMENT : NORMAL_PAD_ALIGNMENT)) {
String msg = "Leftover " + nRemainingBytes if (!canPaddingBeDiscarded(subRecordData, nRemainingBytes)) {
+ " bytes in subrecord data " + HexDump.toHex(subRecordData); String msg = "Leftover " + nRemainingBytes
throw new RecordFormatException(msg); + " bytes in subrecord data " + HexDump.toHex(subRecordData);
throw new RecordFormatException(msg);
}
_isPaddedToQuadByteMultiple = false;
} }
} else { } else {
_isPaddedToQuadByteMultiple = false; _isPaddedToQuadByteMultiple = false;
} }
_uninterpretedData = null;
}
/**
* Some XLS files have ObjRecords with nearly 8Kb of excessive padding. These were probably
* written by a version of POI (around 3.1) which incorrectly interpreted the second short of
* the ftLbs subrecord (0x1FEE) as a length, and read that many bytes as padding (other bugs
* helped allow this to occur).
*
* Excel reads files with this excessive padding OK, truncating the over-sized ObjRecord back
* to the its proper size. POI does the same.
*/
private static boolean canPaddingBeDiscarded(byte[] data, int nRemainingBytes) {
if (data.length < 0x1FEE) {
// this looks like a different problem
return false;
}
// make sure none of the padding looks important
for(int i=data.length-nRemainingBytes; i<data.length; i++) {
if (data[i] != 0x00) {
return false;
}
}
return true;
} }
public String toString() { public String toString() {
@ -112,7 +141,7 @@ public final class ObjRecord extends Record {
sb.append("[OBJ]\n"); sb.append("[OBJ]\n");
for (int i = 0; i < subrecords.size(); i++) { for (int i = 0; i < subrecords.size(); i++) {
SubRecord record = (SubRecord) subrecords.get(i); SubRecord record = subrecords.get(i);
sb.append("SUBRECORD: ").append(record.toString()); sb.append("SUBRECORD: ").append(record.toString());
} }
sb.append("[/OBJ]\n"); sb.append("[/OBJ]\n");
@ -125,7 +154,7 @@ public final class ObjRecord extends Record {
} }
int size = 0; int size = 0;
for (int i=subrecords.size()-1; i>=0; i--) { for (int i=subrecords.size()-1; i>=0; i--) {
SubRecord record = (SubRecord) subrecords.get(i); SubRecord record = subrecords.get(i);
size += record.getDataSize()+4; size += record.getDataSize()+4;
} }
if (_isPaddedToQuadByteMultiple) { if (_isPaddedToQuadByteMultiple) {
@ -151,7 +180,7 @@ public final class ObjRecord extends Record {
if (_uninterpretedData == null) { if (_uninterpretedData == null) {
for (int i = 0; i < subrecords.size(); i++) { for (int i = 0; i < subrecords.size(); i++) {
SubRecord record = (SubRecord) subrecords.get(i); SubRecord record = subrecords.get(i);
record.serialize(out); record.serialize(out);
} }
int expectedEndIx = offset+dataSize; int expectedEndIx = offset+dataSize;
@ -169,7 +198,7 @@ public final class ObjRecord extends Record {
return sid; return sid;
} }
public List getSubRecords() { public List<SubRecord> getSubRecords() {
return subrecords; return subrecords;
} }
@ -177,11 +206,11 @@ public final class ObjRecord extends Record {
subrecords.clear(); subrecords.clear();
} }
public void addSubRecord(int index, Object element) { public void addSubRecord(int index, SubRecord element) {
subrecords.add(index, element); subrecords.add(index, element);
} }
public boolean addSubRecord(Object o) { public boolean addSubRecord(SubRecord o) {
return subrecords.add(o); return subrecords.add(o);
} }
@ -189,8 +218,8 @@ public final class ObjRecord extends Record {
ObjRecord rec = new ObjRecord(); ObjRecord rec = new ObjRecord();
for (int i = 0; i < subrecords.size(); i++) { for (int i = 0; i < subrecords.size(); i++) {
SubRecord record = (SubRecord) subrecords.get(i); SubRecord record = subrecords.get(i);
rec.addSubRecord(record.clone()); rec.addSubRecord((SubRecord) record.clone());
} }
return rec; return rec;
} }

View File

@ -25,6 +25,7 @@ import junit.framework.AssertionFailedError;
import junit.framework.TestCase; import junit.framework.TestCase;
import org.apache.poi.util.HexRead; import org.apache.poi.util.HexRead;
import org.apache.poi.util.LittleEndian;
/** /**
* Tests Subrecord components of an OBJ record. Test data taken directly * Tests Subrecord components of an OBJ record. Test data taken directly
@ -85,21 +86,21 @@ public final class TestSubRecord extends TestCase {
public void testReadManualComboWithFormula() { public void testReadManualComboWithFormula() {
byte[] data = HexRead.readFromString("" byte[] data = HexRead.readFromString(""
+ "5D 00 66 00 " + "5D 00 66 00 "
+ "15 00 12 00 14 00 02 00 11 20 00 00 00 00 " + "15 00 12 00 14 00 02 00 11 20 00 00 00 00 "
+ "20 44 C6 04 00 00 00 00 0C 00 14 00 04 F0 C6 04 " + "20 44 C6 04 00 00 00 00 0C 00 14 00 04 F0 C6 04 "
+ "00 00 00 00 00 00 01 00 06 00 00 00 10 00 00 00 " + "00 00 00 00 00 00 01 00 06 00 00 00 10 00 00 00 "
+ "0E 00 0C 00 05 00 80 44 C6 04 24 09 00 02 00 02 " + "0E 00 0C 00 05 00 80 44 C6 04 24 09 00 02 00 02 "
+ "13 00 DE 1F 10 00 09 00 80 44 C6 04 25 0A 00 0F " + "13 00 DE 1F 10 00 09 00 80 44 C6 04 25 0A 00 0F "
+ "00 02 00 02 00 02 06 00 03 00 08 00 00 00 00 00 " + "00 02 00 02 00 02 06 00 03 00 08 00 00 00 00 00 "
+ "08 00 00 00 00 00 00 00 " // TODO sometimes last byte is non-zero + "08 00 00 00 00 00 00 00 " // TODO sometimes last byte is non-zero
); );
RecordInputStream in = TestcaseRecordInputStream.create(data); RecordInputStream in = TestcaseRecordInputStream.create(data);
ObjRecord or = new ObjRecord(in); ObjRecord or = new ObjRecord(in);
byte[] data2 = or.serialize(); byte[] data2 = or.serialize();
if (data2.length == 8228) { if (data2.length == 8228) {
throw new AssertionFailedError("Identified bug XXXXX"); throw new AssertionFailedError("Identified bug 45778");
} }
assertEquals("Encoded length", data.length, data2.length); assertEquals("Encoded length", data.length, data2.length);
for (int i = 0; i < data.length; i++) { for (int i = 0; i < data.length; i++) {
@ -109,4 +110,52 @@ public final class TestSubRecord extends TestCase {
} }
assertTrue(Arrays.equals(data, data2)); assertTrue(Arrays.equals(data, data2));
} }
/**
* Some versions of POI (e.g. 3.1 - prior to svn r707450 / bug 45778) interpreted the ftLbs
* subrecord second short (0x1FEE) as a length, and hence read lots of extra padding. This
* buffer-overrun in {@link RecordInputStream} happened silently due to problems later fixed
* in svn 707778. When the ObjRecord is written, the extra padding is written too, making the
* record 8224 bytes long instead of 70.
* (An aside: It seems more than a coincidence that this problem creates a record of exactly
* {@link RecordInputStream#MAX_RECORD_DATA_SIZE} but not enough is understood about
* subrecords to explain this.)<br/>
*
* Excel reads files with this excessive padding OK. It also truncates the over-sized
* ObjRecord back to the proper size. POI should do the same.
*/
public void testWayTooMuchPadding_bug46545() {
byte[] data = HexRead.readFromString(""
+ "15 00 12 00 14 00 13 00 01 21 00 00 00"
+ "00 98 0B 5B 09 00 00 00 00 0C 00 14 00 00 00 00 00 00 00 00"
+ "00 00 00 01 00 01 00 00 00 10 00 00 00 "
// ftLbs
+ "13 00 EE 1F 00 00 "
+ "01 00 00 00 01 06 00 00 02 00 08 00 75 00 "
// ftEnd
+ "00 00 00 00"
);
final int LBS_START_POS = 0x002E;
final int WRONG_LBS_SIZE = 0x1FEE;
assertEquals(0x0013, LittleEndian.getShort(data, LBS_START_POS+0));
assertEquals(WRONG_LBS_SIZE, LittleEndian.getShort(data, LBS_START_POS+2));
int wrongTotalSize = LBS_START_POS + 4 + WRONG_LBS_SIZE;
byte[] wrongData = new byte[wrongTotalSize];
System.arraycopy(data, 0, wrongData, 0, data.length);
// wrongData has the ObjRecord data as would have been written by v3.1
RecordInputStream in = TestcaseRecordInputStream.create(ObjRecord.sid, wrongData);
ObjRecord or;
try {
or = new ObjRecord(in);
} catch (RecordFormatException e) {
if (e.getMessage().startsWith("Leftover 8154 bytes in subrecord data")) {
throw new AssertionFailedError("Identified bug 46545");
}
throw e;
}
// make sure POI properly truncates the ObjRecord data
byte[] data2 = or.serialize();
TestcaseRecordInputStream.confirmRecordEncoding(ObjRecord.sid, data, data2);
}
} }