From 41ad7b24101e900fc3d9c1ecc4b9ced81f7fca79 Mon Sep 17 00:00:00 2001 From: Josh Micich Date: Thu, 13 Nov 2008 23:01:46 +0000 Subject: [PATCH] Fix for bug 46199 - tweaks to EmbeddedObjectRefSubRecord git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@713852 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/changes.xml | 1 + src/documentation/content/xdocs/status.xml | 1 + .../record/EmbeddedObjectRefSubRecord.java | 59 ++++++------ .../TestEmbeddedObjectRefSubRecord.java | 66 +++++++++----- .../record/TestcaseRecordInputStream.java | 91 ++++++++++--------- 5 files changed, 125 insertions(+), 93 deletions(-) diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index c1740afad..5fde7e840 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 46199 - More tweaks to EmbeddedObjectRefSubRecord Changes to formula evaluation allowing for reduced memory usage 45290 - Support odd files where the POIFS header block comes after the data blocks, and is on the data blocks list 46184 - More odd escaped date formats diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 57fec14df..c2c90630d 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 46199 - More tweaks to EmbeddedObjectRefSubRecord Changes to formula evaluation allowing for reduced memory usage 45290 - Support odd files where the POIFS header block comes after the data blocks, and is on the data blocks list 46184 - More odd escaped date formats diff --git a/src/java/org/apache/poi/hssf/record/EmbeddedObjectRefSubRecord.java b/src/java/org/apache/poi/hssf/record/EmbeddedObjectRefSubRecord.java index f033149ee..8b1967c47 100644 --- a/src/java/org/apache/poi/hssf/record/EmbeddedObjectRefSubRecord.java +++ b/src/java/org/apache/poi/hssf/record/EmbeddedObjectRefSubRecord.java @@ -46,9 +46,9 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord { private int field_1_unknown_int; /** either an area or a cell ref */ private Ptg field_2_refPtg; + /** for when the 'formula' doesn't parse properly */ private byte[] field_2_unknownFormulaData; - // TODO: Consider making a utility class for these. I've discovered the same field ordering - // in FormatRecord and StringRecord, it may be elsewhere too. + /** note- this byte is not present in the encoding if the string length is zero */ private boolean field_3_unicode_flag; // Flags whether the string is Unicode. private String field_4_ole_classname; // Classname of the embedded OLE document (e.g. Word.Document.8) /** Formulas often have a single non-zero trailing byte. @@ -72,7 +72,7 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord { } public EmbeddedObjectRefSubRecord(LittleEndianInput in, int size) { - // TODO use 'size' param + // Much guess-work going on here due to lack of any documentation. // See similar source code in OOO: // http://lxr.go-oo.org/source/sc/sc/source/filter/excel/xiescher.cxx @@ -101,26 +101,25 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord { int stringByteCount; if (remaining >= dataLenAfterFormula + 3) { int tag = in.readByte(); - remaining -= LittleEndian.BYTE_SIZE; + stringByteCount = LittleEndian.BYTE_SIZE; if (tag != 0x03) { throw new RecordFormatException("Expected byte 0x03 here"); } int nChars = in.readUShort(); - remaining -= LittleEndian.SHORT_SIZE; + stringByteCount += LittleEndian.SHORT_SIZE; if (nChars > 0) { // OOO: the 4th way Xcl stores a unicode string: not even a Grbit byte present if length 0 field_3_unicode_flag = ( in.readByte() & 0x01 ) != 0; - remaining -= LittleEndian.BYTE_SIZE; + stringByteCount += LittleEndian.BYTE_SIZE; if (field_3_unicode_flag) { field_4_ole_classname = StringUtil.readUnicodeLE(in, nChars); - stringByteCount = nChars * 2; + stringByteCount += nChars * 2; } else { field_4_ole_classname = StringUtil.readCompressedUnicode(in, nChars); - stringByteCount = nChars; + stringByteCount += nChars; } } else { field_4_ole_classname = ""; - stringByteCount = 0; } } else { field_4_ole_classname = null; @@ -150,10 +149,7 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord { } else { field_5_stream_id = null; } - - byte [] buf = new byte[remaining]; - in.readFully(buf); - field_6_unknown = buf; + field_6_unknown = readRawData(in, remaining); } private static Ptg readRefPtg(byte[] formulaRawBytes) { @@ -176,9 +172,7 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord { return EMPTY_BYTE_ARRAY; } byte[] result = new byte[size]; - for(int i=0; i< size; i++) { - result[i] = in.readByte(); - } + in.readFully(result); return result; } @@ -191,12 +185,15 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord { // don't write 0x03, stringLen, flag, text stringLen = 0; } else { - result += 1 + 2 + 1; // 0x03, stringLen, flag + result += 1 + 2; // 0x03, stringLen stringLen = field_4_ole_classname.length(); - if (field_3_unicode_flag) { - result += stringLen * 2; - } else { - result += stringLen; + if (stringLen > 0) { + result += 1; // flag + if (field_3_unicode_flag) { + result += stringLen * 2; + } else { + result += stringLen; + } } } // pad to next 2 byte boundary @@ -253,15 +250,17 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord { stringLen = field_4_ole_classname.length(); out.writeShort(stringLen); pos+=2; - out.writeByte(field_3_unicode_flag ? 0x01 : 0x00); - pos+=1; - - if (field_3_unicode_flag) { - StringUtil.putUnicodeLE(field_4_ole_classname, out); - pos += stringLen * 2; - } else { - StringUtil.putCompressedUnicode(field_4_ole_classname, out); - pos += stringLen; + if (stringLen > 0) { + out.writeByte(field_3_unicode_flag ? 0x01 : 0x00); + pos+=1; + + if (field_3_unicode_flag) { + StringUtil.putUnicodeLE(field_4_ole_classname, out); + pos += stringLen * 2; + } else { + StringUtil.putCompressedUnicode(field_4_ole_classname, out); + pos += stringLen; + } } } diff --git a/src/testcases/org/apache/poi/hssf/record/TestEmbeddedObjectRefSubRecord.java b/src/testcases/org/apache/poi/hssf/record/TestEmbeddedObjectRefSubRecord.java index 02875117e..69eefaf48 100644 --- a/src/testcases/org/apache/poi/hssf/record/TestEmbeddedObjectRefSubRecord.java +++ b/src/testcases/org/apache/poi/hssf/record/TestEmbeddedObjectRefSubRecord.java @@ -33,14 +33,18 @@ import org.apache.poi.util.HexRead; */ public final class TestEmbeddedObjectRefSubRecord extends TestCase { - String data1 = "[20, 00, 05, 00, FC, 10, 76, 01, 02, 24, 14, DF, 00, 03, 10, 00, 00, 46, 6F, 72, 6D, 73, 2E, 43, 68, 65, 63, 6B, 42, 6F, 78, 2E, 31, 00, 00, 00, 00, 00, 70, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, ]"; + private static final short EORSR_SID = EmbeddedObjectRefSubRecord.sid; public void testStore() { + String data1 + = "20 00 05 00 FC 10 76 01 02 24 14 DF 00 03 10 00 " + + "00 46 6F 72 6D 73 2E 43 68 65 63 6B 42 6F 78 2E " + + "31 00 00 00 00 00 70 00 00 00 00 00 00 00 00 00 " + + "00 00"; byte[] src = hr(data1); -// src = TestcaseRecordInputStream.mergeDataAndSid(EmbeddedObjectRefSubRecord.sid, (short)src.length, src); - RecordInputStream in = TestcaseRecordInputStream.create(EmbeddedObjectRefSubRecord.sid, src); + RecordInputStream in = TestcaseRecordInputStream.create(EORSR_SID, src); EmbeddedObjectRefSubRecord record1 = new EmbeddedObjectRefSubRecord(in, src.length); @@ -84,22 +88,19 @@ public final class TestEmbeddedObjectRefSubRecord extends TestCase { assertTrue(Arrays.equals(ser, ser2)); } - - /** - * taken from ftPictFmla sub-record in attachment 22645 (offset 0x40AB). - */ - private static final byte[] data45912 = hr( - "12 00 0B 00 F8 02 88 04 3B 00 " + - "00 00 00 01 00 00 00 01 " + - "00 00"); - public void testCameraTool_bug45912() { - byte[] data = data45912; - RecordInputStream in = TestcaseRecordInputStream.create(EmbeddedObjectRefSubRecord.sid, data); + /** + * taken from ftPictFmla sub-record in attachment 22645 (offset 0x40AB). + */ + byte[] data45912 = hr( + "12 00 0B 00 F8 02 88 04 3B 00 " + + "00 00 00 01 00 00 00 01 " + + "00 00"); + RecordInputStream in = TestcaseRecordInputStream.create(EORSR_SID, data45912); - EmbeddedObjectRefSubRecord rec = new EmbeddedObjectRefSubRecord(in, data.length); + EmbeddedObjectRefSubRecord rec = new EmbeddedObjectRefSubRecord(in, data45912.length); byte[] ser2 = rec.serialize(); - confirmData(data, ser2); + confirmData(data45912, ser2); } private static byte[] hr(String string) { @@ -134,14 +135,37 @@ public final class TestEmbeddedObjectRefSubRecord extends TestCase { } private static void confirmRead(byte[] data, int i) { - RecordInputStream in = TestcaseRecordInputStream.create(EmbeddedObjectRefSubRecord.sid, data); + RecordInputStream in = TestcaseRecordInputStream.create(EORSR_SID, data); EmbeddedObjectRefSubRecord rec = new EmbeddedObjectRefSubRecord(in, data.length); byte[] ser2 = rec.serialize(); - byte[] d2 = (byte[]) data.clone(); // remove sid+len for compare - System.arraycopy(ser2, 4, d2, 0, d2.length); - if (!Arrays.equals(data, d2)) { - fail("re-read NQR for case " + i); + TestcaseRecordInputStream.confirmRecordEncoding("Test record " + i, EORSR_SID, data, ser2); + } + + public void testVisioDrawing_bug46199() { + /** + * taken from ftPictFmla sub-record in attachment 22860 (stream offset 0x768F).
+ * Note that the since the string length is zero, there is no unicode flag byte + */ + byte[] data46199 = hr( + "0E 00 " + + "05 00 " + + "28 25 A3 01 " + + "02 6C D1 34 02 " + + "03 00 00 " + + "0F CB E8 00"); + RecordInputStream in = TestcaseRecordInputStream.create(EORSR_SID, data46199); + + EmbeddedObjectRefSubRecord rec; + try { + rec = new EmbeddedObjectRefSubRecord(in, data46199.length); + } catch (RecordFormatException e) { + if (e.getMessage().equals("Not enough data (3) to read requested (4) bytes")) { + throw new AssertionFailedError("Identified bug 22860"); + } + throw e; } + byte[] ser2 = rec.serialize(); + TestcaseRecordInputStream.confirmRecordEncoding(EORSR_SID, data46199, ser2); } } diff --git a/src/testcases/org/apache/poi/hssf/record/TestcaseRecordInputStream.java b/src/testcases/org/apache/poi/hssf/record/TestcaseRecordInputStream.java index 1bbc9d7a9..3fc80407e 100755 --- a/src/testcases/org/apache/poi/hssf/record/TestcaseRecordInputStream.java +++ b/src/testcases/org/apache/poi/hssf/record/TestcaseRecordInputStream.java @@ -21,7 +21,9 @@ import java.io.ByteArrayInputStream; import java.io.InputStream; import junit.framework.Assert; +import junit.framework.AssertionFailedError; +import org.apache.poi.util.HexDump; import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndianByteArrayInputStream; import org.apache.poi.util.LittleEndianInput; @@ -29,29 +31,29 @@ import org.apache.poi.util.LittleEndianInput; /** * A Record Input Stream derivative that makes access to byte arrays used in the * test cases work a bit easier. - *

Creates the sream and moves to the first record. + *

Creates the stream and moves to the first record. * * @author Jason Height (jheight at apache.org) */ public final class TestcaseRecordInputStream { - + private TestcaseRecordInputStream() { // no instances of this class } - + /** - * Prepends a mock record identifier to the supplied data and opens a record input stream + * Prepends a mock record identifier to the supplied data and opens a record input stream */ public static LittleEndianInput createLittleEndian(byte[] data) { return new LittleEndianByteArrayInputStream(data); - + } public static RecordInputStream create(int sid, byte[] data) { return create(mergeDataAndSid(sid, data.length, data)); } /** - * First 4 bytes of data are assumed to be record identifier and length. The supplied - * data can contain multiple records (sequentially encoded in the same way) + * First 4 bytes of data are assumed to be record identifier and length. The supplied + * data can contain multiple records (sequentially encoded in the same way) */ public static RecordInputStream create(byte[] data) { InputStream is = new ByteArrayInputStream(data); @@ -59,40 +61,45 @@ public final class TestcaseRecordInputStream { result.nextRecord(); return result; } - - /** - * Convenience constructor - */ -// public TestcaseRecordInputStream(int sid, byte[] data) -// { -// super(new ByteArrayInputStream(mergeDataAndSid(sid, data.length, data))); -// nextRecord(); -// } -// public TestcaseRecordInputStream(short sid, short length, byte[] data) -// { -// super(new ByteArrayInputStream(mergeDataAndSid(sid, length, data))); -// nextRecord(); -// } - public static byte[] mergeDataAndSid(int sid, int length, byte[] data) { - byte[] result = new byte[data.length + 4]; - LittleEndian.putUShort(result, 0, sid); - LittleEndian.putUShort(result, 2, length); - System.arraycopy(data, 0, result, 4, data.length); - return result; - } - /** - * Confirms data sections are equal - * @param expectedData - just raw data (without sid or size short ints) - * @param actualRecordBytes this includes 4 prefix bytes (sid & size) - */ - public static void confirmRecordEncoding(int expectedSid, byte[] expectedData, byte[] actualRecordBytes) { - int expectedDataSize = expectedData.length; - Assert.assertEquals(actualRecordBytes.length - 4, expectedDataSize); - Assert.assertEquals(expectedSid, LittleEndian.getShort(actualRecordBytes, 0)); - Assert.assertEquals(expectedDataSize, LittleEndian.getShort(actualRecordBytes, 2)); - for (int i = 0; i < expectedDataSize; i++) - Assert.assertEquals("At offset " + i, expectedData[i], actualRecordBytes[i+4]); - - } + public static byte[] mergeDataAndSid(int sid, int length, byte[] data) { + byte[] result = new byte[data.length + 4]; + LittleEndian.putUShort(result, 0, sid); + LittleEndian.putUShort(result, 2, length); + System.arraycopy(data, 0, result, 4, data.length); + return result; + } + /** + * Confirms data sections are equal + * @param expectedData - just raw data (without sid or size short ints) + * @param actualRecordBytes this includes 4 prefix bytes (sid & size) + */ + public static void confirmRecordEncoding(int expectedSid, byte[] expectedData, byte[] actualRecordBytes) + throws AssertionFailedError { + confirmRecordEncoding(null, expectedSid, expectedData, actualRecordBytes); + } + /** + * Confirms data sections are equal + * @param msgPrefix message prefix to be displayed in case of failure + * @param expectedData - just raw data (without ushort sid, ushort size) + * @param actualRecordBytes this includes 4 prefix bytes (sid & size) + */ + public static void confirmRecordEncoding(String msgPrefix, int expectedSid, byte[] expectedData, byte[] actualRecordBytes) + throws AssertionFailedError { + int expectedDataSize = expectedData.length; + Assert.assertEquals("Size of encode data mismatch", actualRecordBytes.length - 4, expectedDataSize); + Assert.assertEquals(expectedSid, LittleEndian.getShort(actualRecordBytes, 0)); + Assert.assertEquals(expectedDataSize, LittleEndian.getShort(actualRecordBytes, 2)); + for (int i = 0; i < expectedDataSize; i++) + if (expectedData[i] != actualRecordBytes[i+4]) { + StringBuilder sb = new StringBuilder(64); + if (msgPrefix != null) { + sb.append(msgPrefix).append(": "); + } + sb.append("At offset ").append(i); + sb.append(": expected ").append(HexDump.byteToHex(expectedData[i])); + sb.append(" but found ").append(HexDump.byteToHex(actualRecordBytes[i+4])); + throw new AssertionFailedError(sb.toString()); + } + } }