Fix for bug 46545 - ObjRecord should ignore excessive padding written by previous POI versions

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@738709 13f79535-47bb-0310-9956-ffa450edef68
2009-01-29 01:57:22 +00:00 · 2009-01-29 01:57:22 +00:00 · 980ff2cf30
commit 980ff2cf30
parent 0b692dd75c
4 changed files with 106 additions and 26 deletions
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@ -37,6 +37,7 @@

 		<!-- Don't forget to update status.xml too! -->
        <release version="3.5-beta5" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">46545 - Fixed ObjRecord to ignore excessive padding written by previous POI versions</action>
           <action dev="POI-DEVELOPERS" type="fix">46613 - Fixed evaluator to perform case insensitive string comparisons</action>
           <action dev="POI-DEVELOPERS" type="add">46544 - command line interface for hssf ExcelExtractor</action>
           <action dev="POI-DEVELOPERS" type="fix">46547 - Allow addition of conditional formatting after data validation</action>
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@ -34,6 +34,7 @@
 	<!-- Don't forget to update changes.xml too! -->
    <changes>
        <release version="3.5-beta5" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">46545 - Fixed ObjRecord to ignore excessive padding written by previous POI versions</action>
           <action dev="POI-DEVELOPERS" type="fix">46613 - Fixed evaluator to perform case insensitive string comparisons</action>
           <action dev="POI-DEVELOPERS" type="add">46544 - command line interface for hssf ExcelExtractor</action>
           <action dev="POI-DEVELOPERS" type="fix">46547 - Allow addition of conditional formatting after data validation</action>
--- a/src/java/org/apache/poi/hssf/record/ObjRecord.java
+++ b/src/java/org/apache/poi/hssf/record/ObjRecord.java
@ -39,9 +39,9 @@ public final class ObjRecord extends Record {
 	private static final int NORMAL_PAD_ALIGNMENT = 2;
 	private static int MAX_PAD_ALIGNMENT = 4;
 	
-	private List subrecords;
+	private List<SubRecord> subrecords;
 	/** used when POI has no idea what is going on */
-	private byte[] _uninterpretedData;
+	private final byte[] _uninterpretedData;
 	/**
 	 * Excel seems to tolerate padding to quad or double byte length
 	 */
@ -52,13 +52,14 @@ public final class ObjRecord extends Record {


 	public ObjRecord() {
-		subrecords = new ArrayList(2);
+		subrecords = new ArrayList<SubRecord>(2);
 		// TODO - ensure 2 sub-records (ftCmo 15h, and ftEnd 00h) are always created
+		_uninterpretedData = null;
 	}

 	public ObjRecord(RecordInputStream in) {
 		// TODO - problems with OBJ sub-records stream
-		// MS spec says first sub-records is always CommonObjectDataSubRecord,
+		// MS spec says first sub-record is always CommonObjectDataSubRecord,
 		// and last is
 		// always EndSubRecord. OOO spec does not mention ObjRecord(0x005D).
 		// Existing POI test data seems to violate that rule. Some test data
@ -74,6 +75,7 @@ public final class ObjRecord extends Record {
 			// Excel tolerates the funny ObjRecord, and replaces it with a corrected version
 			// The exact logic/reasoning is not yet understood
 			_uninterpretedData = subRecordData;
+			subrecords = null;
 			return;
 		}
 		if (subRecordData.length % 2 != 0) {
@ -83,7 +85,7 @@ public final class ObjRecord extends Record {

 //		System.out.println(HexDump.toHex(subRecordData));

-		subrecords = new ArrayList();
+		subrecords = new ArrayList<SubRecord>();
 		ByteArrayInputStream bais = new ByteArrayInputStream(subRecordData);
 		LittleEndianInputStream subRecStream = new LittleEndianInputStream(bais);
 		while (true) {
@ -98,13 +100,40 @@ public final class ObjRecord extends Record {
 			// At present (Oct-2008), most unit test samples have (subRecordData.length % 2 == 0)
 			_isPaddedToQuadByteMultiple = subRecordData.length % MAX_PAD_ALIGNMENT == 0;
 			if (nRemainingBytes >= (_isPaddedToQuadByteMultiple ? MAX_PAD_ALIGNMENT : NORMAL_PAD_ALIGNMENT)) {
-				String msg = "Leftover " + nRemainingBytes 
-				+ " bytes in subrecord data " + HexDump.toHex(subRecordData);
-				throw new RecordFormatException(msg);
+				if (!canPaddingBeDiscarded(subRecordData, nRemainingBytes)) {
+					String msg = "Leftover " + nRemainingBytes 
+						+ " bytes in subrecord data " + HexDump.toHex(subRecordData);
+					throw new RecordFormatException(msg);
+				}
+				_isPaddedToQuadByteMultiple = false;
 			}
 		} else {
 			_isPaddedToQuadByteMultiple = false;
 		}
+		_uninterpretedData = null;
+	}
+
+	/**
+	 * Some XLS files have ObjRecords with nearly 8Kb of excessive padding. These were probably
+	 * written by a version of POI (around 3.1) which incorrectly interpreted the second short of
+	 * the ftLbs subrecord (0x1FEE) as a length, and read that many bytes as padding (other bugs
+	 * helped allow this to occur).
+	 * 
+	 * Excel reads files with this excessive padding OK, truncating the over-sized ObjRecord back
+	 * to the its proper size.  POI does the same.
+	 */
+	private static boolean canPaddingBeDiscarded(byte[] data, int nRemainingBytes) {
+		if (data.length < 0x1FEE) {
+			// this looks like a different problem
+			return false;
+		}
+		// make sure none of the padding looks important
+		for(int i=data.length-nRemainingBytes; i<data.length; i++) {
+			if (data[i] != 0x00) {
+				return false;
+			}
+		}
+		return true;
 	}

 	public String toString() {
@ -112,7 +141,7 @@ public final class ObjRecord extends Record {

 		sb.append("[OBJ]\n");
 		for (int i = 0; i < subrecords.size(); i++) {
-			SubRecord record = (SubRecord) subrecords.get(i);
+			SubRecord record = subrecords.get(i);
 			sb.append("SUBRECORD: ").append(record.toString());
 		}
 		sb.append("[/OBJ]\n");
@ -125,7 +154,7 @@ public final class ObjRecord extends Record {
 		}
 		int size = 0;
 		for (int i=subrecords.size()-1; i>=0; i--) {
-			SubRecord record = (SubRecord) subrecords.get(i);
+			SubRecord record = subrecords.get(i);
 			size += record.getDataSize()+4;
 		}
 		if (_isPaddedToQuadByteMultiple) {
@ -151,7 +180,7 @@ public final class ObjRecord extends Record {
 		if (_uninterpretedData == null) {

 			for (int i = 0; i < subrecords.size(); i++) {
-				SubRecord record = (SubRecord) subrecords.get(i);
+				SubRecord record = subrecords.get(i);
 				record.serialize(out);
 			}
 			int expectedEndIx = offset+dataSize;
@ -169,7 +198,7 @@ public final class ObjRecord extends Record {
 		return sid;
 	}

-	public List getSubRecords() {
+	public List<SubRecord> getSubRecords() {
 		return subrecords;
 	}

@ -177,11 +206,11 @@ public final class ObjRecord extends Record {
 		subrecords.clear();
 	}

-	public void addSubRecord(int index, Object element) {
+	public void addSubRecord(int index, SubRecord element) {
 		subrecords.add(index, element);
 	}

-	public boolean addSubRecord(Object o) {
+	public boolean addSubRecord(SubRecord o) {
 		return subrecords.add(o);
 	}

@ -189,8 +218,8 @@ public final class ObjRecord extends Record {
 		ObjRecord rec = new ObjRecord();

 		for (int i = 0; i < subrecords.size(); i++) {
-			SubRecord record = (SubRecord) subrecords.get(i);
-			rec.addSubRecord(record.clone());
+			SubRecord record = subrecords.get(i);
+			rec.addSubRecord((SubRecord) record.clone());
 		}
 		return rec;
 	}
--- a/src/testcases/org/apache/poi/hssf/record/TestSubRecord.java
+++ b/src/testcases/org/apache/poi/hssf/record/TestSubRecord.java
@ -25,6 +25,7 @@ import junit.framework.AssertionFailedError;
 import junit.framework.TestCase;

 import org.apache.poi.util.HexRead;
+import org.apache.poi.util.LittleEndian;

 /**
 * Tests Subrecord components of an OBJ record.  Test data taken directly
@ -85,21 +86,21 @@ public final class TestSubRecord extends TestCase {
 	
 	public void testReadManualComboWithFormula() {
 		byte[] data = HexRead.readFromString(""
-   			+ "5D 00 66 00 "
-   			+ "15 00 12 00 14 00 02 00 11 20 00 00 00 00 "
-   			+ "20 44 C6 04 00 00 00 00 0C 00 14 00 04 F0 C6 04 "
-   			+ "00 00 00 00 00 00 01 00 06 00 00 00 10 00 00 00 "
-   			+ "0E 00 0C 00 05 00 80 44 C6 04 24 09 00 02 00 02 "
-   			+ "13 00 DE 1F 10 00 09 00 80 44 C6 04 25 0A 00 0F "
-   			+ "00 02 00 02 00 02 06 00 03 00 08 00 00 00 00 00 "
-   			+ "08 00 00 00 00 00 00 00 " // TODO sometimes last byte is non-zero
-   		);
+			+ "5D 00 66 00 "
+			+ "15 00 12 00 14 00 02 00 11 20 00 00 00 00 "
+			+ "20 44 C6 04 00 00 00 00 0C 00 14 00 04 F0 C6 04 "
+			+ "00 00 00 00 00 00 01 00 06 00 00 00 10 00 00 00 "
+			+ "0E 00 0C 00 05 00 80 44 C6 04 24 09 00 02 00 02 "
+			+ "13 00 DE 1F 10 00 09 00 80 44 C6 04 25 0A 00 0F "
+			+ "00 02 00 02 00 02 06 00 03 00 08 00 00 00 00 00 "
+			+ "08 00 00 00 00 00 00 00 " // TODO sometimes last byte is non-zero
+		);
 		
 		RecordInputStream in = TestcaseRecordInputStream.create(data);
 		ObjRecord or = new ObjRecord(in);
 		byte[] data2 = or.serialize();
 		if (data2.length == 8228) {
-			throw new AssertionFailedError("Identified bug XXXXX");
+			throw new AssertionFailedError("Identified bug 45778");
 		}
 		assertEquals("Encoded length", data.length, data2.length);
 		for (int i = 0; i < data.length; i++) {
@ -109,4 +110,52 @@ public final class TestSubRecord extends TestCase {
 		}
 		assertTrue(Arrays.equals(data, data2));
 	}
+
+	/**
+	 * Some versions of POI (e.g. 3.1 - prior to svn r707450 / bug 45778) interpreted the ftLbs 
+	 * subrecord second short (0x1FEE) as a length, and hence read lots of extra padding.  This 
+	 * buffer-overrun in {@link RecordInputStream} happened silently due to problems later fixed
+	 * in svn 707778. When the ObjRecord is written, the extra padding is written too, making the 
+	 * record 8224 bytes long instead of 70.  
+	 * (An aside: It seems more than a coincidence that this problem creates a record of exactly
+	 * {@link RecordInputStream#MAX_RECORD_DATA_SIZE} but not enough is understood about 
+	 * subrecords to explain this.)<br/>
+	 * 
+	 * Excel reads files with this excessive padding OK.  It also truncates the over-sized
+	 * ObjRecord back to the proper size.  POI should do the same.
+	 */
+	public void testWayTooMuchPadding_bug46545() {
+		byte[] data = HexRead.readFromString(""
+			+ "15 00 12 00 14 00 13 00 01 21 00 00 00"
+			+ "00 98 0B 5B 09 00 00 00 00 0C 00 14 00 00 00 00 00 00 00 00"
+			+ "00 00 00 01 00 01 00 00 00 10 00 00 00 "
+			// ftLbs
+			+ "13 00 EE 1F 00 00 "
+			+ "01 00 00 00 01 06 00 00 02 00 08 00 75 00 "
+			// ftEnd
+			+ "00 00 00 00"
+		);
+		final int LBS_START_POS = 0x002E;
+		final int WRONG_LBS_SIZE = 0x1FEE;
+		assertEquals(0x0013, LittleEndian.getShort(data, LBS_START_POS+0));
+		assertEquals(WRONG_LBS_SIZE, LittleEndian.getShort(data, LBS_START_POS+2));
+		int wrongTotalSize = LBS_START_POS + 4 + WRONG_LBS_SIZE;
+		byte[] wrongData = new byte[wrongTotalSize];
+		System.arraycopy(data, 0, wrongData, 0, data.length);
+		// wrongData has the ObjRecord data as would have been written by v3.1 
+		
+		RecordInputStream in = TestcaseRecordInputStream.create(ObjRecord.sid, wrongData);
+		ObjRecord or;
+		try {
+			or = new ObjRecord(in);
+		} catch (RecordFormatException e) {
+			if (e.getMessage().startsWith("Leftover 8154 bytes in subrecord data")) {
+				throw new AssertionFailedError("Identified bug 46545");
+			}
+			throw e;
+		}
+		// make sure POI properly truncates the ObjRecord data
+		byte[] data2 = or.serialize();
+		TestcaseRecordInputStream.confirmRecordEncoding(ObjRecord.sid, data, data2);
+	}
 }