Fix for bug 46199 - tweaks to EmbeddedObjectRefSubRecord

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@713852 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Josh Micich 2008-11-13 23:01:46 +00:00
parent 9873ea6391
commit 41ad7b2410
5 changed files with 125 additions and 93 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! --> <!-- Don't forget to update status.xml too! -->
<release version="3.5-beta4" date="2008-??-??"> <release version="3.5-beta4" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="fix">46199 - More tweaks to EmbeddedObjectRefSubRecord</header>
<action dev="POI-DEVELOPERS" type="add">Changes to formula evaluation allowing for reduced memory usage</action> <action dev="POI-DEVELOPERS" type="add">Changes to formula evaluation allowing for reduced memory usage</action>
<action dev="POI-DEVELOPERS" type="fix">45290 - Support odd files where the POIFS header block comes after the data blocks, and is on the data blocks list</header> <action dev="POI-DEVELOPERS" type="fix">45290 - Support odd files where the POIFS header block comes after the data blocks, and is on the data blocks list</header>
<action dev="POI-DEVELOPERS" type="fix">46184 - More odd escaped date formats</action> <action dev="POI-DEVELOPERS" type="fix">46184 - More odd escaped date formats</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! --> <!-- Don't forget to update changes.xml too! -->
<changes> <changes>
<release version="3.5-beta4" date="2008-??-??"> <release version="3.5-beta4" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="fix">46199 - More tweaks to EmbeddedObjectRefSubRecord</header>
<action dev="POI-DEVELOPERS" type="add">Changes to formula evaluation allowing for reduced memory usage</action> <action dev="POI-DEVELOPERS" type="add">Changes to formula evaluation allowing for reduced memory usage</action>
<action dev="POI-DEVELOPERS" type="fix">45290 - Support odd files where the POIFS header block comes after the data blocks, and is on the data blocks list</header> <action dev="POI-DEVELOPERS" type="fix">45290 - Support odd files where the POIFS header block comes after the data blocks, and is on the data blocks list</header>
<action dev="POI-DEVELOPERS" type="fix">46184 - More odd escaped date formats</action> <action dev="POI-DEVELOPERS" type="fix">46184 - More odd escaped date formats</action>

View File

@ -46,9 +46,9 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord {
private int field_1_unknown_int; private int field_1_unknown_int;
/** either an area or a cell ref */ /** either an area or a cell ref */
private Ptg field_2_refPtg; private Ptg field_2_refPtg;
/** for when the 'formula' doesn't parse properly */
private byte[] field_2_unknownFormulaData; private byte[] field_2_unknownFormulaData;
// TODO: Consider making a utility class for these. I've discovered the same field ordering /** note- this byte is not present in the encoding if the string length is zero */
// in FormatRecord and StringRecord, it may be elsewhere too.
private boolean field_3_unicode_flag; // Flags whether the string is Unicode. private boolean field_3_unicode_flag; // Flags whether the string is Unicode.
private String field_4_ole_classname; // Classname of the embedded OLE document (e.g. Word.Document.8) private String field_4_ole_classname; // Classname of the embedded OLE document (e.g. Word.Document.8)
/** Formulas often have a single non-zero trailing byte. /** Formulas often have a single non-zero trailing byte.
@ -72,7 +72,7 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord {
} }
public EmbeddedObjectRefSubRecord(LittleEndianInput in, int size) { public EmbeddedObjectRefSubRecord(LittleEndianInput in, int size) {
// TODO use 'size' param
// Much guess-work going on here due to lack of any documentation. // Much guess-work going on here due to lack of any documentation.
// See similar source code in OOO: // See similar source code in OOO:
// http://lxr.go-oo.org/source/sc/sc/source/filter/excel/xiescher.cxx // http://lxr.go-oo.org/source/sc/sc/source/filter/excel/xiescher.cxx
@ -101,26 +101,25 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord {
int stringByteCount; int stringByteCount;
if (remaining >= dataLenAfterFormula + 3) { if (remaining >= dataLenAfterFormula + 3) {
int tag = in.readByte(); int tag = in.readByte();
remaining -= LittleEndian.BYTE_SIZE; stringByteCount = LittleEndian.BYTE_SIZE;
if (tag != 0x03) { if (tag != 0x03) {
throw new RecordFormatException("Expected byte 0x03 here"); throw new RecordFormatException("Expected byte 0x03 here");
} }
int nChars = in.readUShort(); int nChars = in.readUShort();
remaining -= LittleEndian.SHORT_SIZE; stringByteCount += LittleEndian.SHORT_SIZE;
if (nChars > 0) { if (nChars > 0) {
// OOO: the 4th way Xcl stores a unicode string: not even a Grbit byte present if length 0 // OOO: the 4th way Xcl stores a unicode string: not even a Grbit byte present if length 0
field_3_unicode_flag = ( in.readByte() & 0x01 ) != 0; field_3_unicode_flag = ( in.readByte() & 0x01 ) != 0;
remaining -= LittleEndian.BYTE_SIZE; stringByteCount += LittleEndian.BYTE_SIZE;
if (field_3_unicode_flag) { if (field_3_unicode_flag) {
field_4_ole_classname = StringUtil.readUnicodeLE(in, nChars); field_4_ole_classname = StringUtil.readUnicodeLE(in, nChars);
stringByteCount = nChars * 2; stringByteCount += nChars * 2;
} else { } else {
field_4_ole_classname = StringUtil.readCompressedUnicode(in, nChars); field_4_ole_classname = StringUtil.readCompressedUnicode(in, nChars);
stringByteCount = nChars; stringByteCount += nChars;
} }
} else { } else {
field_4_ole_classname = ""; field_4_ole_classname = "";
stringByteCount = 0;
} }
} else { } else {
field_4_ole_classname = null; field_4_ole_classname = null;
@ -150,10 +149,7 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord {
} else { } else {
field_5_stream_id = null; field_5_stream_id = null;
} }
field_6_unknown = readRawData(in, remaining);
byte [] buf = new byte[remaining];
in.readFully(buf);
field_6_unknown = buf;
} }
private static Ptg readRefPtg(byte[] formulaRawBytes) { private static Ptg readRefPtg(byte[] formulaRawBytes) {
@ -176,9 +172,7 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord {
return EMPTY_BYTE_ARRAY; return EMPTY_BYTE_ARRAY;
} }
byte[] result = new byte[size]; byte[] result = new byte[size];
for(int i=0; i< size; i++) { in.readFully(result);
result[i] = in.readByte();
}
return result; return result;
} }
@ -191,12 +185,15 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord {
// don't write 0x03, stringLen, flag, text // don't write 0x03, stringLen, flag, text
stringLen = 0; stringLen = 0;
} else { } else {
result += 1 + 2 + 1; // 0x03, stringLen, flag result += 1 + 2; // 0x03, stringLen
stringLen = field_4_ole_classname.length(); stringLen = field_4_ole_classname.length();
if (field_3_unicode_flag) { if (stringLen > 0) {
result += stringLen * 2; result += 1; // flag
} else { if (field_3_unicode_flag) {
result += stringLen; result += stringLen * 2;
} else {
result += stringLen;
}
} }
} }
// pad to next 2 byte boundary // pad to next 2 byte boundary
@ -253,15 +250,17 @@ public final class EmbeddedObjectRefSubRecord extends SubRecord {
stringLen = field_4_ole_classname.length(); stringLen = field_4_ole_classname.length();
out.writeShort(stringLen); out.writeShort(stringLen);
pos+=2; pos+=2;
out.writeByte(field_3_unicode_flag ? 0x01 : 0x00); if (stringLen > 0) {
pos+=1; out.writeByte(field_3_unicode_flag ? 0x01 : 0x00);
pos+=1;
if (field_3_unicode_flag) {
StringUtil.putUnicodeLE(field_4_ole_classname, out); if (field_3_unicode_flag) {
pos += stringLen * 2; StringUtil.putUnicodeLE(field_4_ole_classname, out);
} else { pos += stringLen * 2;
StringUtil.putCompressedUnicode(field_4_ole_classname, out); } else {
pos += stringLen; StringUtil.putCompressedUnicode(field_4_ole_classname, out);
pos += stringLen;
}
} }
} }

View File

@ -33,14 +33,18 @@ import org.apache.poi.util.HexRead;
*/ */
public final class TestEmbeddedObjectRefSubRecord extends TestCase { public final class TestEmbeddedObjectRefSubRecord extends TestCase {
String data1 = "[20, 00, 05, 00, FC, 10, 76, 01, 02, 24, 14, DF, 00, 03, 10, 00, 00, 46, 6F, 72, 6D, 73, 2E, 43, 68, 65, 63, 6B, 42, 6F, 78, 2E, 31, 00, 00, 00, 00, 00, 70, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, ]"; private static final short EORSR_SID = EmbeddedObjectRefSubRecord.sid;
public void testStore() { public void testStore() {
String data1
= "20 00 05 00 FC 10 76 01 02 24 14 DF 00 03 10 00 "
+ "00 46 6F 72 6D 73 2E 43 68 65 63 6B 42 6F 78 2E "
+ "31 00 00 00 00 00 70 00 00 00 00 00 00 00 00 00 "
+ "00 00";
byte[] src = hr(data1); byte[] src = hr(data1);
// src = TestcaseRecordInputStream.mergeDataAndSid(EmbeddedObjectRefSubRecord.sid, (short)src.length, src);
RecordInputStream in = TestcaseRecordInputStream.create(EmbeddedObjectRefSubRecord.sid, src); RecordInputStream in = TestcaseRecordInputStream.create(EORSR_SID, src);
EmbeddedObjectRefSubRecord record1 = new EmbeddedObjectRefSubRecord(in, src.length); EmbeddedObjectRefSubRecord record1 = new EmbeddedObjectRefSubRecord(in, src.length);
@ -84,22 +88,19 @@ public final class TestEmbeddedObjectRefSubRecord extends TestCase {
assertTrue(Arrays.equals(ser, ser2)); assertTrue(Arrays.equals(ser, ser2));
} }
/**
* taken from ftPictFmla sub-record in attachment 22645 (offset 0x40AB).
*/
private static final byte[] data45912 = hr(
"12 00 0B 00 F8 02 88 04 3B 00 " +
"00 00 00 01 00 00 00 01 " +
"00 00");
public void testCameraTool_bug45912() { public void testCameraTool_bug45912() {
byte[] data = data45912; /**
RecordInputStream in = TestcaseRecordInputStream.create(EmbeddedObjectRefSubRecord.sid, data); * taken from ftPictFmla sub-record in attachment 22645 (offset 0x40AB).
*/
byte[] data45912 = hr(
"12 00 0B 00 F8 02 88 04 3B 00 " +
"00 00 00 01 00 00 00 01 " +
"00 00");
RecordInputStream in = TestcaseRecordInputStream.create(EORSR_SID, data45912);
EmbeddedObjectRefSubRecord rec = new EmbeddedObjectRefSubRecord(in, data.length); EmbeddedObjectRefSubRecord rec = new EmbeddedObjectRefSubRecord(in, data45912.length);
byte[] ser2 = rec.serialize(); byte[] ser2 = rec.serialize();
confirmData(data, ser2); confirmData(data45912, ser2);
} }
private static byte[] hr(String string) { private static byte[] hr(String string) {
@ -134,14 +135,37 @@ public final class TestEmbeddedObjectRefSubRecord extends TestCase {
} }
private static void confirmRead(byte[] data, int i) { private static void confirmRead(byte[] data, int i) {
RecordInputStream in = TestcaseRecordInputStream.create(EmbeddedObjectRefSubRecord.sid, data); RecordInputStream in = TestcaseRecordInputStream.create(EORSR_SID, data);
EmbeddedObjectRefSubRecord rec = new EmbeddedObjectRefSubRecord(in, data.length); EmbeddedObjectRefSubRecord rec = new EmbeddedObjectRefSubRecord(in, data.length);
byte[] ser2 = rec.serialize(); byte[] ser2 = rec.serialize();
byte[] d2 = (byte[]) data.clone(); // remove sid+len for compare TestcaseRecordInputStream.confirmRecordEncoding("Test record " + i, EORSR_SID, data, ser2);
System.arraycopy(ser2, 4, d2, 0, d2.length); }
if (!Arrays.equals(data, d2)) {
fail("re-read NQR for case " + i); public void testVisioDrawing_bug46199() {
/**
* taken from ftPictFmla sub-record in attachment 22860 (stream offset 0x768F).<br/>
* Note that the since the string length is zero, there is no unicode flag byte
*/
byte[] data46199 = hr(
"0E 00 "
+ "05 00 "
+ "28 25 A3 01 "
+ "02 6C D1 34 02 "
+ "03 00 00 "
+ "0F CB E8 00");
RecordInputStream in = TestcaseRecordInputStream.create(EORSR_SID, data46199);
EmbeddedObjectRefSubRecord rec;
try {
rec = new EmbeddedObjectRefSubRecord(in, data46199.length);
} catch (RecordFormatException e) {
if (e.getMessage().equals("Not enough data (3) to read requested (4) bytes")) {
throw new AssertionFailedError("Identified bug 22860");
}
throw e;
} }
byte[] ser2 = rec.serialize();
TestcaseRecordInputStream.confirmRecordEncoding(EORSR_SID, data46199, ser2);
} }
} }

View File

@ -21,7 +21,9 @@ import java.io.ByteArrayInputStream;
import java.io.InputStream; import java.io.InputStream;
import junit.framework.Assert; import junit.framework.Assert;
import junit.framework.AssertionFailedError;
import org.apache.poi.util.HexDump;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianByteArrayInputStream; import org.apache.poi.util.LittleEndianByteArrayInputStream;
import org.apache.poi.util.LittleEndianInput; import org.apache.poi.util.LittleEndianInput;
@ -29,29 +31,29 @@ import org.apache.poi.util.LittleEndianInput;
/** /**
* A Record Input Stream derivative that makes access to byte arrays used in the * A Record Input Stream derivative that makes access to byte arrays used in the
* test cases work a bit easier. * test cases work a bit easier.
* <p> Creates the sream and moves to the first record. * <p> Creates the stream and moves to the first record.
* *
* @author Jason Height (jheight at apache.org) * @author Jason Height (jheight at apache.org)
*/ */
public final class TestcaseRecordInputStream { public final class TestcaseRecordInputStream {
private TestcaseRecordInputStream() { private TestcaseRecordInputStream() {
// no instances of this class // no instances of this class
} }
/** /**
* Prepends a mock record identifier to the supplied data and opens a record input stream * Prepends a mock record identifier to the supplied data and opens a record input stream
*/ */
public static LittleEndianInput createLittleEndian(byte[] data) { public static LittleEndianInput createLittleEndian(byte[] data) {
return new LittleEndianByteArrayInputStream(data); return new LittleEndianByteArrayInputStream(data);
} }
public static RecordInputStream create(int sid, byte[] data) { public static RecordInputStream create(int sid, byte[] data) {
return create(mergeDataAndSid(sid, data.length, data)); return create(mergeDataAndSid(sid, data.length, data));
} }
/** /**
* First 4 bytes of <tt>data</tt> are assumed to be record identifier and length. The supplied * First 4 bytes of <tt>data</tt> are assumed to be record identifier and length. The supplied
* <tt>data</tt> can contain multiple records (sequentially encoded in the same way) * <tt>data</tt> can contain multiple records (sequentially encoded in the same way)
*/ */
public static RecordInputStream create(byte[] data) { public static RecordInputStream create(byte[] data) {
InputStream is = new ByteArrayInputStream(data); InputStream is = new ByteArrayInputStream(data);
@ -59,40 +61,45 @@ public final class TestcaseRecordInputStream {
result.nextRecord(); result.nextRecord();
return result; return result;
} }
/**
* Convenience constructor
*/
// public TestcaseRecordInputStream(int sid, byte[] data)
// {
// super(new ByteArrayInputStream(mergeDataAndSid(sid, data.length, data)));
// nextRecord();
// }
// public TestcaseRecordInputStream(short sid, short length, byte[] data)
// {
// super(new ByteArrayInputStream(mergeDataAndSid(sid, length, data)));
// nextRecord();
// }
public static byte[] mergeDataAndSid(int sid, int length, byte[] data) { public static byte[] mergeDataAndSid(int sid, int length, byte[] data) {
byte[] result = new byte[data.length + 4]; byte[] result = new byte[data.length + 4];
LittleEndian.putUShort(result, 0, sid); LittleEndian.putUShort(result, 0, sid);
LittleEndian.putUShort(result, 2, length); LittleEndian.putUShort(result, 2, length);
System.arraycopy(data, 0, result, 4, data.length); System.arraycopy(data, 0, result, 4, data.length);
return result; return result;
} }
/** /**
* Confirms data sections are equal * Confirms data sections are equal
* @param expectedData - just raw data (without sid or size short ints) * @param expectedData - just raw data (without sid or size short ints)
* @param actualRecordBytes this includes 4 prefix bytes (sid & size) * @param actualRecordBytes this includes 4 prefix bytes (sid & size)
*/ */
public static void confirmRecordEncoding(int expectedSid, byte[] expectedData, byte[] actualRecordBytes) { public static void confirmRecordEncoding(int expectedSid, byte[] expectedData, byte[] actualRecordBytes)
int expectedDataSize = expectedData.length; throws AssertionFailedError {
Assert.assertEquals(actualRecordBytes.length - 4, expectedDataSize); confirmRecordEncoding(null, expectedSid, expectedData, actualRecordBytes);
Assert.assertEquals(expectedSid, LittleEndian.getShort(actualRecordBytes, 0)); }
Assert.assertEquals(expectedDataSize, LittleEndian.getShort(actualRecordBytes, 2)); /**
for (int i = 0; i < expectedDataSize; i++) * Confirms data sections are equal
Assert.assertEquals("At offset " + i, expectedData[i], actualRecordBytes[i+4]); * @param msgPrefix message prefix to be displayed in case of failure
* @param expectedData - just raw data (without ushort sid, ushort size)
} * @param actualRecordBytes this includes 4 prefix bytes (sid & size)
*/
public static void confirmRecordEncoding(String msgPrefix, int expectedSid, byte[] expectedData, byte[] actualRecordBytes)
throws AssertionFailedError {
int expectedDataSize = expectedData.length;
Assert.assertEquals("Size of encode data mismatch", actualRecordBytes.length - 4, expectedDataSize);
Assert.assertEquals(expectedSid, LittleEndian.getShort(actualRecordBytes, 0));
Assert.assertEquals(expectedDataSize, LittleEndian.getShort(actualRecordBytes, 2));
for (int i = 0; i < expectedDataSize; i++)
if (expectedData[i] != actualRecordBytes[i+4]) {
StringBuilder sb = new StringBuilder(64);
if (msgPrefix != null) {
sb.append(msgPrefix).append(": ");
}
sb.append("At offset ").append(i);
sb.append(": expected ").append(HexDump.byteToHex(expectedData[i]));
sb.append(" but found ").append(HexDump.byteToHex(actualRecordBytes[i+4]));
throw new AssertionFailedError(sb.toString());
}
}
} }