From 980ff2cf303f7eb67c76631c7346ff1b2022c1c2 Mon Sep 17 00:00:00 2001 From: Josh Micich Date: Thu, 29 Jan 2009 01:57:22 +0000 Subject: [PATCH] Fix for bug 46545 - ObjRecord should ignore excessive padding written by previous POI versions git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@738709 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/changes.xml | 1 + src/documentation/content/xdocs/status.xml | 1 + .../org/apache/poi/hssf/record/ObjRecord.java | 61 +++++++++++----- .../apache/poi/hssf/record/TestSubRecord.java | 69 ++++++++++++++++--- 4 files changed, 106 insertions(+), 26 deletions(-) diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 61605b277..8167226ca 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 46545 - Fixed ObjRecord to ignore excessive padding written by previous POI versions 46613 - Fixed evaluator to perform case insensitive string comparisons 46544 - command line interface for hssf ExcelExtractor 46547 - Allow addition of conditional formatting after data validation diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 595dfa169..131c08713 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 46545 - Fixed ObjRecord to ignore excessive padding written by previous POI versions 46613 - Fixed evaluator to perform case insensitive string comparisons 46544 - command line interface for hssf ExcelExtractor 46547 - Allow addition of conditional formatting after data validation diff --git a/src/java/org/apache/poi/hssf/record/ObjRecord.java b/src/java/org/apache/poi/hssf/record/ObjRecord.java index 6f4a651cc..32231d9ae 100644 --- a/src/java/org/apache/poi/hssf/record/ObjRecord.java +++ b/src/java/org/apache/poi/hssf/record/ObjRecord.java @@ -39,9 +39,9 @@ public final class ObjRecord extends Record { private static final int NORMAL_PAD_ALIGNMENT = 2; private static int MAX_PAD_ALIGNMENT = 4; - private List subrecords; + private List subrecords; /** used when POI has no idea what is going on */ - private byte[] _uninterpretedData; + private final byte[] _uninterpretedData; /** * Excel seems to tolerate padding to quad or double byte length */ @@ -52,13 +52,14 @@ public final class ObjRecord extends Record { public ObjRecord() { - subrecords = new ArrayList(2); + subrecords = new ArrayList(2); // TODO - ensure 2 sub-records (ftCmo 15h, and ftEnd 00h) are always created + _uninterpretedData = null; } public ObjRecord(RecordInputStream in) { // TODO - problems with OBJ sub-records stream - // MS spec says first sub-records is always CommonObjectDataSubRecord, + // MS spec says first sub-record is always CommonObjectDataSubRecord, // and last is // always EndSubRecord. OOO spec does not mention ObjRecord(0x005D). // Existing POI test data seems to violate that rule. Some test data @@ -74,6 +75,7 @@ public final class ObjRecord extends Record { // Excel tolerates the funny ObjRecord, and replaces it with a corrected version // The exact logic/reasoning is not yet understood _uninterpretedData = subRecordData; + subrecords = null; return; } if (subRecordData.length % 2 != 0) { @@ -83,7 +85,7 @@ public final class ObjRecord extends Record { // System.out.println(HexDump.toHex(subRecordData)); - subrecords = new ArrayList(); + subrecords = new ArrayList(); ByteArrayInputStream bais = new ByteArrayInputStream(subRecordData); LittleEndianInputStream subRecStream = new LittleEndianInputStream(bais); while (true) { @@ -98,13 +100,40 @@ public final class ObjRecord extends Record { // At present (Oct-2008), most unit test samples have (subRecordData.length % 2 == 0) _isPaddedToQuadByteMultiple = subRecordData.length % MAX_PAD_ALIGNMENT == 0; if (nRemainingBytes >= (_isPaddedToQuadByteMultiple ? MAX_PAD_ALIGNMENT : NORMAL_PAD_ALIGNMENT)) { - String msg = "Leftover " + nRemainingBytes - + " bytes in subrecord data " + HexDump.toHex(subRecordData); - throw new RecordFormatException(msg); + if (!canPaddingBeDiscarded(subRecordData, nRemainingBytes)) { + String msg = "Leftover " + nRemainingBytes + + " bytes in subrecord data " + HexDump.toHex(subRecordData); + throw new RecordFormatException(msg); + } + _isPaddedToQuadByteMultiple = false; } } else { _isPaddedToQuadByteMultiple = false; } + _uninterpretedData = null; + } + + /** + * Some XLS files have ObjRecords with nearly 8Kb of excessive padding. These were probably + * written by a version of POI (around 3.1) which incorrectly interpreted the second short of + * the ftLbs subrecord (0x1FEE) as a length, and read that many bytes as padding (other bugs + * helped allow this to occur). + * + * Excel reads files with this excessive padding OK, truncating the over-sized ObjRecord back + * to the its proper size. POI does the same. + */ + private static boolean canPaddingBeDiscarded(byte[] data, int nRemainingBytes) { + if (data.length < 0x1FEE) { + // this looks like a different problem + return false; + } + // make sure none of the padding looks important + for(int i=data.length-nRemainingBytes; i=0; i--) { - SubRecord record = (SubRecord) subrecords.get(i); + SubRecord record = subrecords.get(i); size += record.getDataSize()+4; } if (_isPaddedToQuadByteMultiple) { @@ -151,7 +180,7 @@ public final class ObjRecord extends Record { if (_uninterpretedData == null) { for (int i = 0; i < subrecords.size(); i++) { - SubRecord record = (SubRecord) subrecords.get(i); + SubRecord record = subrecords.get(i); record.serialize(out); } int expectedEndIx = offset+dataSize; @@ -169,7 +198,7 @@ public final class ObjRecord extends Record { return sid; } - public List getSubRecords() { + public List getSubRecords() { return subrecords; } @@ -177,11 +206,11 @@ public final class ObjRecord extends Record { subrecords.clear(); } - public void addSubRecord(int index, Object element) { + public void addSubRecord(int index, SubRecord element) { subrecords.add(index, element); } - public boolean addSubRecord(Object o) { + public boolean addSubRecord(SubRecord o) { return subrecords.add(o); } @@ -189,8 +218,8 @@ public final class ObjRecord extends Record { ObjRecord rec = new ObjRecord(); for (int i = 0; i < subrecords.size(); i++) { - SubRecord record = (SubRecord) subrecords.get(i); - rec.addSubRecord(record.clone()); + SubRecord record = subrecords.get(i); + rec.addSubRecord((SubRecord) record.clone()); } return rec; } diff --git a/src/testcases/org/apache/poi/hssf/record/TestSubRecord.java b/src/testcases/org/apache/poi/hssf/record/TestSubRecord.java index 9d0edd1c1..926f24397 100644 --- a/src/testcases/org/apache/poi/hssf/record/TestSubRecord.java +++ b/src/testcases/org/apache/poi/hssf/record/TestSubRecord.java @@ -25,6 +25,7 @@ import junit.framework.AssertionFailedError; import junit.framework.TestCase; import org.apache.poi.util.HexRead; +import org.apache.poi.util.LittleEndian; /** * Tests Subrecord components of an OBJ record. Test data taken directly @@ -85,21 +86,21 @@ public final class TestSubRecord extends TestCase { public void testReadManualComboWithFormula() { byte[] data = HexRead.readFromString("" - + "5D 00 66 00 " - + "15 00 12 00 14 00 02 00 11 20 00 00 00 00 " - + "20 44 C6 04 00 00 00 00 0C 00 14 00 04 F0 C6 04 " - + "00 00 00 00 00 00 01 00 06 00 00 00 10 00 00 00 " - + "0E 00 0C 00 05 00 80 44 C6 04 24 09 00 02 00 02 " - + "13 00 DE 1F 10 00 09 00 80 44 C6 04 25 0A 00 0F " - + "00 02 00 02 00 02 06 00 03 00 08 00 00 00 00 00 " - + "08 00 00 00 00 00 00 00 " // TODO sometimes last byte is non-zero - ); + + "5D 00 66 00 " + + "15 00 12 00 14 00 02 00 11 20 00 00 00 00 " + + "20 44 C6 04 00 00 00 00 0C 00 14 00 04 F0 C6 04 " + + "00 00 00 00 00 00 01 00 06 00 00 00 10 00 00 00 " + + "0E 00 0C 00 05 00 80 44 C6 04 24 09 00 02 00 02 " + + "13 00 DE 1F 10 00 09 00 80 44 C6 04 25 0A 00 0F " + + "00 02 00 02 00 02 06 00 03 00 08 00 00 00 00 00 " + + "08 00 00 00 00 00 00 00 " // TODO sometimes last byte is non-zero + ); RecordInputStream in = TestcaseRecordInputStream.create(data); ObjRecord or = new ObjRecord(in); byte[] data2 = or.serialize(); if (data2.length == 8228) { - throw new AssertionFailedError("Identified bug XXXXX"); + throw new AssertionFailedError("Identified bug 45778"); } assertEquals("Encoded length", data.length, data2.length); for (int i = 0; i < data.length; i++) { @@ -109,4 +110,52 @@ public final class TestSubRecord extends TestCase { } assertTrue(Arrays.equals(data, data2)); } + + /** + * Some versions of POI (e.g. 3.1 - prior to svn r707450 / bug 45778) interpreted the ftLbs + * subrecord second short (0x1FEE) as a length, and hence read lots of extra padding. This + * buffer-overrun in {@link RecordInputStream} happened silently due to problems later fixed + * in svn 707778. When the ObjRecord is written, the extra padding is written too, making the + * record 8224 bytes long instead of 70. + * (An aside: It seems more than a coincidence that this problem creates a record of exactly + * {@link RecordInputStream#MAX_RECORD_DATA_SIZE} but not enough is understood about + * subrecords to explain this.)
+ * + * Excel reads files with this excessive padding OK. It also truncates the over-sized + * ObjRecord back to the proper size. POI should do the same. + */ + public void testWayTooMuchPadding_bug46545() { + byte[] data = HexRead.readFromString("" + + "15 00 12 00 14 00 13 00 01 21 00 00 00" + + "00 98 0B 5B 09 00 00 00 00 0C 00 14 00 00 00 00 00 00 00 00" + + "00 00 00 01 00 01 00 00 00 10 00 00 00 " + // ftLbs + + "13 00 EE 1F 00 00 " + + "01 00 00 00 01 06 00 00 02 00 08 00 75 00 " + // ftEnd + + "00 00 00 00" + ); + final int LBS_START_POS = 0x002E; + final int WRONG_LBS_SIZE = 0x1FEE; + assertEquals(0x0013, LittleEndian.getShort(data, LBS_START_POS+0)); + assertEquals(WRONG_LBS_SIZE, LittleEndian.getShort(data, LBS_START_POS+2)); + int wrongTotalSize = LBS_START_POS + 4 + WRONG_LBS_SIZE; + byte[] wrongData = new byte[wrongTotalSize]; + System.arraycopy(data, 0, wrongData, 0, data.length); + // wrongData has the ObjRecord data as would have been written by v3.1 + + RecordInputStream in = TestcaseRecordInputStream.create(ObjRecord.sid, wrongData); + ObjRecord or; + try { + or = new ObjRecord(in); + } catch (RecordFormatException e) { + if (e.getMessage().startsWith("Leftover 8154 bytes in subrecord data")) { + throw new AssertionFailedError("Identified bug 46545"); + } + throw e; + } + // make sure POI properly truncates the ObjRecord data + byte[] data2 = or.serialize(); + TestcaseRecordInputStream.confirmRecordEncoding(ObjRecord.sid, data, data2); + } }