Fix for bug 46199 - tweaks to EmbeddedObjectRefSubRecord

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@713852 13f79535-47bb-0310-9956-ffa450edef68
2008-11-13 23:01:46 +00:00 · 2008-11-13 23:01:46 +00:00 · 41ad7b2410
commit 41ad7b2410
parent 9873ea6391
5 changed files with 125 additions and 93 deletions
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@ -37,6 +37,7 @@

 		<!-- Don't forget to update status.xml too! -->
        <release version="3.5-beta4" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">46199 - More tweaks to EmbeddedObjectRefSubRecord</header>
           <action dev="POI-DEVELOPERS" type="add">Changes to formula evaluation allowing for reduced memory usage</action>
           <action dev="POI-DEVELOPERS" type="fix">45290 - Support odd files where the POIFS header block comes after the data blocks, and is on the data blocks list</header>
           <action dev="POI-DEVELOPERS" type="fix">46184 - More odd escaped date formats</action>
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@ -34,6 +34,7 @@
 	<!-- Don't forget to update changes.xml too! -->
    <changes>
        <release version="3.5-beta4" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">46199 - More tweaks to EmbeddedObjectRefSubRecord</header>
           <action dev="POI-DEVELOPERS" type="add">Changes to formula evaluation allowing for reduced memory usage</action>
           <action dev="POI-DEVELOPERS" type="fix">45290 - Support odd files where the POIFS header block comes after the data blocks, and is on the data blocks list</header>
           <action dev="POI-DEVELOPERS" type="fix">46184 - More odd escaped date formats</action>
--- a/src/java/org/apache/poi/hssf/record/EmbeddedObjectRefSubRecord.java
+++ b/src/java/org/apache/poi/hssf/record/EmbeddedObjectRefSubRecord.java
@ -46,9 +46,9 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord {
 	private int field_1_unknown_int;
 	/** either an area or a cell ref */
 	private Ptg field_2_refPtg;
+	/** for when the 'formula' doesn't parse properly */
 	private byte[] field_2_unknownFormulaData;
-	// TODO: Consider making a utility class for these.  I've discovered the same field ordering
-	//	   in FormatRecord and StringRecord, it may be elsewhere too.
+	/** note- this byte is not present in the encoding if the string length is zero */ 
 	private boolean field_3_unicode_flag;  // Flags whether the string is Unicode.
 	private String  field_4_ole_classname; // Classname of the embedded OLE document (e.g. Word.Document.8)
 	/** Formulas often have a single non-zero trailing byte.
@ -72,7 +72,7 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord {
 	}

 	public EmbeddedObjectRefSubRecord(LittleEndianInput in, int size) {
-		// TODO use 'size' param 
+
 		// Much guess-work going on here due to lack of any documentation.
 		// See similar source code in OOO:
 		// http://lxr.go-oo.org/source/sc/sc/source/filter/excel/xiescher.cxx
@ -101,26 +101,25 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord {
 		int stringByteCount;
 		if (remaining >= dataLenAfterFormula + 3) {
 			int tag = in.readByte();
-			remaining -= LittleEndian.BYTE_SIZE;
+			stringByteCount = LittleEndian.BYTE_SIZE;
 			if (tag != 0x03) {
 				throw new RecordFormatException("Expected byte 0x03 here");
 			}
 			int nChars = in.readUShort();
-			remaining -= LittleEndian.SHORT_SIZE;
+			stringByteCount += LittleEndian.SHORT_SIZE;
 			if (nChars > 0) {
 				 // OOO: the 4th way Xcl stores a unicode string: not even a Grbit byte present if length 0
 				field_3_unicode_flag = ( in.readByte() & 0x01 ) != 0;
-				remaining -= LittleEndian.BYTE_SIZE;
+				stringByteCount += LittleEndian.BYTE_SIZE;
 				if (field_3_unicode_flag) {
 					field_4_ole_classname = StringUtil.readUnicodeLE(in, nChars);
-					stringByteCount = nChars * 2;
+					stringByteCount += nChars * 2;
 				} else {
 					field_4_ole_classname = StringUtil.readCompressedUnicode(in, nChars);
-					stringByteCount = nChars;
+					stringByteCount += nChars;
 				}
 			} else {
 				field_4_ole_classname = "";
-				stringByteCount = 0;
 			}
 		} else {
 			field_4_ole_classname = null;
@ -150,10 +149,7 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord {
 		} else {
 			field_5_stream_id = null;
 		}
-
-		byte [] buf = new byte[remaining];
-		in.readFully(buf);
-		field_6_unknown = buf;
+		field_6_unknown = readRawData(in, remaining);
 	}

 	private static Ptg readRefPtg(byte[] formulaRawBytes) {
@ -176,9 +172,7 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord {
 			return EMPTY_BYTE_ARRAY;
 		}
 		byte[] result = new byte[size];
-		for(int i=0; i< size; i++) {
-			result[i] = in.readByte();
-		}
+		in.readFully(result);
 		return result;
 	}
 	
@ -191,12 +185,15 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord {
 			// don't write 0x03, stringLen, flag, text
 			stringLen = 0;
 		} else {
-			result += 1 + 2 + 1;  // 0x03, stringLen, flag
+			result += 1 + 2;  // 0x03, stringLen
 			stringLen = field_4_ole_classname.length();
-			if (field_3_unicode_flag) {
-				result += stringLen * 2;
-			} else {
-				result += stringLen;
+			if (stringLen > 0) {
+				result += 1; // flag
+				if (field_3_unicode_flag) {
+					result += stringLen * 2;
+				} else {
+					result += stringLen;
+				}
 			}
 		}
 		// pad to next 2 byte boundary
@ -253,15 +250,17 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord {
 			stringLen = field_4_ole_classname.length();
 			out.writeShort(stringLen);
 			pos+=2;
-			out.writeByte(field_3_unicode_flag ? 0x01 : 0x00);
-			pos+=1;
-
-			if (field_3_unicode_flag) {
-				StringUtil.putUnicodeLE(field_4_ole_classname, out);
-				pos += stringLen * 2;
-			} else {
-				StringUtil.putCompressedUnicode(field_4_ole_classname, out);
-				pos += stringLen;
+			if (stringLen > 0) {
+    			out.writeByte(field_3_unicode_flag ? 0x01 : 0x00);
+    			pos+=1;
+    
+    			if (field_3_unicode_flag) {
+    				StringUtil.putUnicodeLE(field_4_ole_classname, out);
+    				pos += stringLen * 2;
+    			} else {
+    				StringUtil.putCompressedUnicode(field_4_ole_classname, out);
+    				pos += stringLen;
+    			}
 			}
 		}

--- a/src/testcases/org/apache/poi/hssf/record/TestEmbeddedObjectRefSubRecord.java
+++ b/src/testcases/org/apache/poi/hssf/record/TestEmbeddedObjectRefSubRecord.java
@ -33,14 +33,18 @@ import org.apache.poi.util.HexRead;
 */
 public final class TestEmbeddedObjectRefSubRecord extends TestCase {

-	String data1 = "[20, 00, 05, 00, FC, 10, 76, 01, 02, 24, 14, DF, 00, 03, 10, 00, 00, 46, 6F, 72, 6D, 73, 2E, 43, 68, 65, 63, 6B, 42, 6F, 78, 2E, 31, 00, 00, 00, 00, 00, 70, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, ]";
+	private static final short EORSR_SID = EmbeddedObjectRefSubRecord.sid;

 	public void testStore() {
+		String data1
+				= "20 00 05 00 FC 10 76 01 02 24 14 DF 00 03 10 00 "
+				+ "00 46 6F 72 6D 73 2E 43 68 65 63 6B 42 6F 78 2E "
+				+ "31 00 00 00 00 00 70 00 00 00 00 00 00 00 00 00 "
+				+ "00 00";

 		byte[] src = hr(data1);
-//		src = TestcaseRecordInputStream.mergeDataAndSid(EmbeddedObjectRefSubRecord.sid, (short)src.length, src);

-		RecordInputStream in = TestcaseRecordInputStream.create(EmbeddedObjectRefSubRecord.sid, src);
+		RecordInputStream in = TestcaseRecordInputStream.create(EORSR_SID, src);

 		EmbeddedObjectRefSubRecord record1 = new EmbeddedObjectRefSubRecord(in, src.length);

@ -84,22 +88,19 @@ public final class TestEmbeddedObjectRefSubRecord extends TestCase {
 		assertTrue(Arrays.equals(ser, ser2));
 	}

-
-	/**
-	 * taken from ftPictFmla sub-record in attachment 22645 (offset 0x40AB).
-	 */
-	private static final byte[] data45912 = hr(
-		"12 00 0B 00 F8 02 88 04 3B 00 " +
-		"00 00 00 01 00 00 00 01 " +
-		"00 00");
-
 	public void testCameraTool_bug45912() {
-		byte[] data = data45912;
-		RecordInputStream in = TestcaseRecordInputStream.create(EmbeddedObjectRefSubRecord.sid, data);
+		/**
+		 * taken from ftPictFmla sub-record in attachment 22645 (offset 0x40AB).
+		 */
+		byte[] data45912 = hr(
+				"12 00 0B 00 F8 02 88 04 3B 00 " +
+				"00 00 00 01 00 00 00 01 " +
+				"00 00");
+		RecordInputStream in = TestcaseRecordInputStream.create(EORSR_SID, data45912);

-		EmbeddedObjectRefSubRecord rec = new EmbeddedObjectRefSubRecord(in, data.length);
+		EmbeddedObjectRefSubRecord rec = new EmbeddedObjectRefSubRecord(in, data45912.length);
 		byte[] ser2 = rec.serialize();
-		confirmData(data, ser2);
+		confirmData(data45912, ser2);
 	}

 	private static byte[] hr(String string) {
@ -134,14 +135,37 @@ public final class TestEmbeddedObjectRefSubRecord extends TestCase {
 	}

 	private static void confirmRead(byte[] data, int i) {
-		RecordInputStream in = TestcaseRecordInputStream.create(EmbeddedObjectRefSubRecord.sid, data);
+		RecordInputStream in = TestcaseRecordInputStream.create(EORSR_SID, data);

 		EmbeddedObjectRefSubRecord rec = new EmbeddedObjectRefSubRecord(in, data.length);
 		byte[] ser2 = rec.serialize();
-		byte[] d2 = (byte[]) data.clone(); // remove sid+len for compare
-		System.arraycopy(ser2, 4, d2, 0, d2.length);
-		if (!Arrays.equals(data, d2)) {
-			fail("re-read NQR for case " + i);
+		TestcaseRecordInputStream.confirmRecordEncoding("Test record " + i, EORSR_SID, data, ser2);
+	}
+	
+	public void testVisioDrawing_bug46199() {
+		/**
+		 * taken from ftPictFmla sub-record in attachment 22860 (stream offset 0x768F).<br/>
+		 * Note that the since the string length is zero, there is no unicode flag byte
+		 */
+		byte[] data46199 = hr(
+				  "0E 00 "
+				+ "05 00 "
+				+ "28 25 A3 01 "
+				+ "02 6C D1 34 02 "
+				+ "03 00 00 "
+				+ "0F CB E8 00");
+		RecordInputStream in = TestcaseRecordInputStream.create(EORSR_SID, data46199);
+
+		EmbeddedObjectRefSubRecord rec;
+		try {
+			rec = new EmbeddedObjectRefSubRecord(in, data46199.length);
+		} catch (RecordFormatException e) {
+			if (e.getMessage().equals("Not enough data (3) to read requested (4) bytes")) {
+				throw new AssertionFailedError("Identified bug 22860");
+			}
+			throw e;
 		}
+		byte[] ser2 = rec.serialize();
+		TestcaseRecordInputStream.confirmRecordEncoding(EORSR_SID, data46199, ser2);
 	}
 }
--- a/src/testcases/org/apache/poi/hssf/record/TestcaseRecordInputStream.java
+++ b/src/testcases/org/apache/poi/hssf/record/TestcaseRecordInputStream.java
@ -21,7 +21,9 @@ import java.io.ByteArrayInputStream;
 import java.io.InputStream;

 import junit.framework.Assert;
+import junit.framework.AssertionFailedError;

+import org.apache.poi.util.HexDump;
 import org.apache.poi.util.LittleEndian;
 import org.apache.poi.util.LittleEndianByteArrayInputStream;
 import org.apache.poi.util.LittleEndianInput;
@ -29,29 +31,29 @@ import org.apache.poi.util.LittleEndianInput;
 /**
 * A Record Input Stream derivative that makes access to byte arrays used in the
 * test cases work a bit easier.
- * <p> Creates the sream and moves to the first record.
+ * <p> Creates the stream and moves to the first record.
 *
 * @author Jason Height (jheight at apache.org)
 */
 public final class TestcaseRecordInputStream {
-	
+
 	private TestcaseRecordInputStream() {
 		// no instances of this class
 	}
-	
+
 	/**
-	 * Prepends a mock record identifier to the supplied data and opens a record input stream 
+	 * Prepends a mock record identifier to the supplied data and opens a record input stream
 	 */
 	public static LittleEndianInput createLittleEndian(byte[] data) {
 		return new LittleEndianByteArrayInputStream(data);
-		
+
 	}
 	public static RecordInputStream create(int sid, byte[] data) {
 		return create(mergeDataAndSid(sid, data.length, data));
 	}
 	/**
-	 * First 4 bytes of <tt>data</tt> are assumed to be record identifier and length. The supplied 
-	 * <tt>data</tt> can contain multiple records (sequentially encoded in the same way) 
+	 * First 4 bytes of <tt>data</tt> are assumed to be record identifier and length. The supplied
+	 * <tt>data</tt> can contain multiple records (sequentially encoded in the same way)
 	 */
 	public static RecordInputStream create(byte[] data) {
 		InputStream is = new ByteArrayInputStream(data);
@ -59,40 +61,45 @@ public final class TestcaseRecordInputStream {
 		result.nextRecord();
 		return result;
 	}
-	
-    /**
-     * Convenience constructor
-     */
-//    public TestcaseRecordInputStream(int sid, byte[] data)
-//    {
-//      super(new ByteArrayInputStream(mergeDataAndSid(sid, data.length, data)));
-//      nextRecord();
-//    }
-//    public TestcaseRecordInputStream(short sid, short length, byte[] data)
-//    {
-//      super(new ByteArrayInputStream(mergeDataAndSid(sid, length, data)));
-//      nextRecord();
-//    }

-    public static byte[] mergeDataAndSid(int sid, int length, byte[] data) {
-      byte[] result = new byte[data.length + 4];
-      LittleEndian.putUShort(result, 0, sid);
-      LittleEndian.putUShort(result, 2, length);
-      System.arraycopy(data, 0, result, 4, data.length);
-      return result;
-    }
-    /**
-     * Confirms data sections are equal
-     * @param expectedData - just raw data (without sid or size short ints)
-     * @param actualRecordBytes this includes 4 prefix bytes (sid & size)
-     */
-    public static void confirmRecordEncoding(int expectedSid, byte[] expectedData, byte[] actualRecordBytes) {
-        int expectedDataSize = expectedData.length;
-        Assert.assertEquals(actualRecordBytes.length - 4, expectedDataSize);
-        Assert.assertEquals(expectedSid, LittleEndian.getShort(actualRecordBytes, 0));
-        Assert.assertEquals(expectedDataSize, LittleEndian.getShort(actualRecordBytes, 2));
-        for (int i = 0; i < expectedDataSize; i++)
-            Assert.assertEquals("At offset " + i, expectedData[i], actualRecordBytes[i+4]);
-        
-    }
+	public static byte[] mergeDataAndSid(int sid, int length, byte[] data) {
+	  byte[] result = new byte[data.length + 4];
+	  LittleEndian.putUShort(result, 0, sid);
+	  LittleEndian.putUShort(result, 2, length);
+	  System.arraycopy(data, 0, result, 4, data.length);
+	  return result;
+	}
+	/**
+	 * Confirms data sections are equal
+	 * @param expectedData - just raw data (without sid or size short ints)
+	 * @param actualRecordBytes this includes 4 prefix bytes (sid & size)
+	 */
+	public static void confirmRecordEncoding(int expectedSid, byte[] expectedData, byte[] actualRecordBytes)
+			throws AssertionFailedError {
+		confirmRecordEncoding(null, expectedSid, expectedData, actualRecordBytes);
+	}
+	/**
+	 * Confirms data sections are equal
+	 * @param msgPrefix message prefix to be displayed in case of failure
+	 * @param expectedData - just raw data (without ushort sid, ushort size)
+	 * @param actualRecordBytes this includes 4 prefix bytes (sid & size)
+	 */
+	public static void confirmRecordEncoding(String msgPrefix, int expectedSid, byte[] expectedData, byte[] actualRecordBytes)
+			throws AssertionFailedError {
+		int expectedDataSize = expectedData.length;
+		Assert.assertEquals("Size of encode data mismatch", actualRecordBytes.length - 4, expectedDataSize);
+		Assert.assertEquals(expectedSid, LittleEndian.getShort(actualRecordBytes, 0));
+		Assert.assertEquals(expectedDataSize, LittleEndian.getShort(actualRecordBytes, 2));
+		for (int i = 0; i < expectedDataSize; i++)
+			if (expectedData[i] != actualRecordBytes[i+4]) {
+				StringBuilder sb = new StringBuilder(64);
+				if (msgPrefix != null) {
+					sb.append(msgPrefix).append(": ");
+				}
+				sb.append("At offset ").append(i);
+				sb.append(": expected ").append(HexDump.byteToHex(expectedData[i]));
+				sb.append(" but found ").append(HexDump.byteToHex(actualRecordBytes[i+4]));
+				throw new AssertionFailedError(sb.toString());
+			}
+	}
 }