From 177caeec53dafb56c092d68df1ef769118be7c04 Mon Sep 17 00:00:00 2001 From: Yegor Kozlov Date: Sat, 8 Sep 2007 16:34:10 +0000 Subject: [PATCH] Support for getting OLE objects from HSSFWorkbook. See bug 43222 for details. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@573878 13f79535-47bb-0310-9956-ffa450edef68 --- .../record/EmbeddedObjectRefSubRecord.java | 184 ++++++++++++++++++ .../org/apache/poi/hssf/record/SubRecord.java | 3 + .../poi/hssf/usermodel/HSSFObjectData.java | 90 +++++++++ .../poi/hssf/usermodel/HSSFWorkbook.java | 47 ++++- src/java/org/apache/poi/util/HexDump.java | 19 ++ .../poi/hssf/usermodel/TestOLE2Embeding.java | 42 ++-- 6 files changed, 372 insertions(+), 13 deletions(-) create mode 100644 src/java/org/apache/poi/hssf/record/EmbeddedObjectRefSubRecord.java create mode 100644 src/java/org/apache/poi/hssf/usermodel/HSSFObjectData.java diff --git a/src/java/org/apache/poi/hssf/record/EmbeddedObjectRefSubRecord.java b/src/java/org/apache/poi/hssf/record/EmbeddedObjectRefSubRecord.java new file mode 100644 index 000000000..9a9719b0d --- /dev/null +++ b/src/java/org/apache/poi/hssf/record/EmbeddedObjectRefSubRecord.java @@ -0,0 +1,184 @@ + +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.hssf.record; + + + +import org.apache.poi.util.*; + +/** + * A sub-record within the OBJ record which stores a reference to an object + * stored in a separate entry within the OLE2 compound file. + * + * @author Daniel Noll + */ +public class EmbeddedObjectRefSubRecord + extends SubRecord +{ + public static final short sid = 0x9; + + public short field_1_stream_id_offset; // Offset to stream ID from the point after this value. + public short[] field_2_unknown; // Unknown stuff at the front. TODO: Confirm that it's a short[] + // TODO: Consider making a utility class for these. I've discovered the same field ordering + // in FormatRecord and StringRecord, it may be elsewhere too. + public short field_3_unicode_len; // Length of Unicode string. + public boolean field_4_unicode_flag; // Flags whether the string is Unicode. + public String field_5_ole_classname; // Classname of the embedded OLE document (e.g. Word.Document.8) + public int field_6_stream_id; // ID of the OLE stream containing the actual data. + + public EmbeddedObjectRefSubRecord() + { + } + + /** + * Constructs an EmbeddedObjectRef record and sets its fields appropriately. + * + * @param in the record input stream. + */ + public EmbeddedObjectRefSubRecord(RecordInputStream in) + { + super(in); + } + + /** + * Checks the sid matches the expected side for this record + * + * @param id the expected sid. + */ + protected void validateSid(short id) + { + if (id != sid) + { + throw new RecordFormatException("Not a EmbeddedObjectRef record"); + } + } + + public short getSid() + { + return sid; + } + + protected void fillFields(RecordInputStream in) + { + field_1_stream_id_offset = in.readShort(); + field_2_unknown = in.readShortArray(); + field_3_unicode_len = in.readShort(); + field_4_unicode_flag = ( in.readByte() & 0x01 ) != 0; + + if ( field_4_unicode_flag ) + { + field_5_ole_classname = in.readUnicodeLEString( field_3_unicode_len ); + } + else + { + field_5_ole_classname = in.readCompressedUnicode( field_3_unicode_len ); + } + + // Padded with NUL bytes. The -2 is because field_1_stream_id_offset + // is relative to after the offset field, whereas in.getRecordOffset() + // is relative to the start of this record. + while (in.getRecordOffset() - 2 < field_1_stream_id_offset) + { + in.readByte(); // discard + } + + field_6_stream_id = in.readInt(); + } + + public int serialize(int offset, byte[] data) + { + int pos = offset; + + LittleEndian.putShort(data, pos, field_1_stream_id_offset); pos += 2; + LittleEndian.putShortArray(data, pos, field_2_unknown); pos += field_2_unknown.length * 2 + 2; + LittleEndian.putShort(data, pos, field_3_unicode_len); pos += 2; + data[pos] = field_4_unicode_flag ? (byte) 0x01 : (byte) 0x00; pos++; + + if ( field_4_unicode_flag ) + { + StringUtil.putUnicodeLE( field_5_ole_classname, data, pos ); pos += field_5_ole_classname.length() * 2; + } + else + { + StringUtil.putCompressedUnicode( field_5_ole_classname, data, pos ); pos += field_5_ole_classname.length(); + } + + // Padded with NUL bytes. + pos = field_1_stream_id_offset; + + LittleEndian.putInt(data, pos, field_6_stream_id); pos += 4; + + return getRecordSize(); + } + + /** + * Size of record (exluding 4 byte header) + */ + public int getRecordSize() + { + // Conveniently this stores the length of all the crap before the final int value. + return field_1_stream_id_offset + 4; + } + + /** + * Gets the stream ID containing the actual data. The data itself + * can be found under a top-level directory entry in the OLE2 filesystem + * under the name "MBDxxxxxxxx" where xxxxxxxx is + * this ID converted into hex (in big endian order, funnily enough.) + * + * @return the data stream ID. + */ + public int getStreamId() + { + return field_6_stream_id; + } + + public String toString() + { + StringBuffer buffer = new StringBuffer(); + buffer.append("[ftPictFmla]\n"); + buffer.append(" .streamIdOffset = ") + .append("0x").append(HexDump.toHex( field_1_stream_id_offset )) + .append(" (").append( field_1_stream_id_offset ).append(" )") + .append(System.getProperty("line.separator")); + buffer.append(" .unknown = ") + .append("0x").append(HexDump.toHex( field_2_unknown )) + .append(" (").append( field_2_unknown ).append(" )") + .append(System.getProperty("line.separator")); + buffer.append(" .unicodeLen = ") + .append("0x").append(HexDump.toHex( field_3_unicode_len )) + .append(" (").append( field_3_unicode_len ).append(" )") + .append(System.getProperty("line.separator")); + buffer.append(" .unicodeFlag = ") + .append("0x").append( field_4_unicode_flag ? 0x01 : 0x00 ) + .append(" (").append( field_4_unicode_flag ).append(" )") + .append(System.getProperty("line.separator")); + buffer.append(" .oleClassname = ") + .append(field_5_ole_classname) + .append(System.getProperty("line.separator")); + buffer.append(" .streamId = ") + .append("0x").append(HexDump.toHex( field_6_stream_id )) + .append(" (").append( field_6_stream_id ).append(" )") + .append(System.getProperty("line.separator")); + buffer.append("[/ftPictFmla]"); + return buffer.toString(); + } + +} diff --git a/src/java/org/apache/poi/hssf/record/SubRecord.java b/src/java/org/apache/poi/hssf/record/SubRecord.java index 944c671d6..6b836c6ca 100644 --- a/src/java/org/apache/poi/hssf/record/SubRecord.java +++ b/src/java/org/apache/poi/hssf/record/SubRecord.java @@ -58,6 +58,9 @@ abstract public class SubRecord case CommonObjectDataSubRecord.sid: r = new CommonObjectDataSubRecord( in ); break; + case EmbeddedObjectRefSubRecord.sid: + r = new EmbeddedObjectRefSubRecord( in ); + break; case GroupMarkerSubRecord.sid: r = new GroupMarkerSubRecord( in ); break; diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFObjectData.java b/src/java/org/apache/poi/hssf/usermodel/HSSFObjectData.java new file mode 100644 index 000000000..b1c5c66e0 --- /dev/null +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFObjectData.java @@ -0,0 +1,90 @@ +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.hssf.usermodel; + +import java.io.IOException; +import java.util.Iterator; + +import org.apache.poi.hssf.record.EmbeddedObjectRefSubRecord; +import org.apache.poi.hssf.record.ObjRecord; +import org.apache.poi.poifs.filesystem.DirectoryEntry; +import org.apache.poi.poifs.filesystem.Entry; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.util.HexDump; + +/** + * Represents binary object (i.e. OLE) data stored in the file. Eg. A GIF, JPEG etc... + * + * @author Daniel Noll + */ +public class HSSFObjectData +{ + /** + * Underlying object record ultimately containing a reference to the object. + */ + private ObjRecord record; + + /** + * Reference to the filesystem, required for retrieving the object data. + */ + private POIFSFileSystem poifs; + + /** + * Constructs object data by wrapping a lower level object record. + * + * @param record the low-level object record. + * @param poifs the filesystem, required for retrieving the object data. + */ + public HSSFObjectData(ObjRecord record, POIFSFileSystem poifs) + { + this.record = record; + this.poifs = poifs; + } + + /** + * Gets the object data. + * + * @return the object data as an OLE2 directory. + * @throws IOException if there was an error reading the data. + */ + public DirectoryEntry getDirectory() throws IOException + { + Iterator subRecordIter = record.getSubRecords().iterator(); + while (subRecordIter.hasNext()) + { + Object subRecord = subRecordIter.next(); + if (subRecord instanceof EmbeddedObjectRefSubRecord) + { + int streamId = ((EmbeddedObjectRefSubRecord) subRecord).getStreamId(); + String streamName = "MBD" + HexDump.toHex(streamId); + + Entry entry = poifs.getRoot().getEntry(streamName); + if (entry instanceof DirectoryEntry) + { + return (DirectoryEntry) entry; + } + else + { + throw new IOException("Stream " + streamName + " was not an OLE2 directory"); + } + } + } + + throw new IllegalStateException("Object data does not contain a reference to an embedded object OLE2 directory"); + } +} diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java index 6e811280f..75377728a 100644 --- a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java @@ -208,7 +208,7 @@ public class HSSFWorkbook setPropertiesFromWorkbook(workbook); int recOffset = workbook.getNumRecords(); int sheetNum = 0; - + // convert all LabelRecord records to LabelSSTRecord convertLabelRecords(records, recOffset); while (recOffset < records.size()) @@ -1332,6 +1332,7 @@ public class HSSFWorkbook */ public List getAllPictures() { + // The drawing group record always exists at the top level, so we won't need to do this recursively. List pictures = new ArrayList(); Iterator recordIter = workbook.getRecords().iterator(); while (recordIter.hasNext()) @@ -1395,6 +1396,50 @@ public class HSSFWorkbook this.workbook.unwriteProtectWorkbook(); } + /** + * Gets all embedded OLE2 objects from the Workbook. + * + * @return the list of embedded objects (a list of {@link HSSFObjectData} objects.) + */ + public List getAllEmbeddedObjects() + { + List objects = new ArrayList(); + for (int i = 0; i < getNumberOfSheets(); i++) + { + getAllEmbeddedObjects(getSheetAt(i).getSheet().getRecords(), objects); + } + return objects; + } + + /** + * Gets all embedded OLE2 objects from the Workbook. + * + * @param records the list of records to search. + * @param objects the list of embedded objects to populate. + */ + private void getAllEmbeddedObjects(List records, List objects) + { + Iterator recordIter = records.iterator(); + while (recordIter.hasNext()) + { + Object obj = recordIter.next(); + if (obj instanceof ObjRecord) + { + // TODO: More convenient way of determining if there is stored binary. + // TODO: Link to the data stored in the other stream. + Iterator subRecordIter = ((ObjRecord) obj).getSubRecords().iterator(); + while (subRecordIter.hasNext()) + { + Object sub = subRecordIter.next(); + if (sub instanceof EmbeddedObjectRefSubRecord) + { + objects.add(new HSSFObjectData((ObjRecord) obj, poifs)); + } + } + } + } + } + private byte[] newUID() { byte[] bytes = new byte[16]; diff --git a/src/java/org/apache/poi/util/HexDump.java b/src/java/org/apache/poi/util/HexDump.java index a6e23bdd7..aaea9d57a 100644 --- a/src/java/org/apache/poi/util/HexDump.java +++ b/src/java/org/apache/poi/util/HexDump.java @@ -268,6 +268,25 @@ public class HexDump return retVal.toString(); } + /** + * Converts the parameter to a hex value. + * + * @param value The value to convert + * @return A String representing the array of shorts + */ + public static String toHex(final short[] value) + { + StringBuffer retVal = new StringBuffer(); + retVal.append('['); + for(int x = 0; x < value.length; x++) + { + retVal.append(toHex(value[x])); + retVal.append(", "); + } + retVal.append(']'); + return retVal.toString(); + } + /** *

Converts the parameter to a hex value breaking the results into * lines.

diff --git a/src/testcases/org/apache/poi/hssf/usermodel/TestOLE2Embeding.java b/src/testcases/org/apache/poi/hssf/usermodel/TestOLE2Embeding.java index e0828c71c..dd5753130 100644 --- a/src/testcases/org/apache/poi/hssf/usermodel/TestOLE2Embeding.java +++ b/src/testcases/org/apache/poi/hssf/usermodel/TestOLE2Embeding.java @@ -19,25 +19,43 @@ package org.apache.poi.hssf.usermodel; import java.io.File; import java.io.FileInputStream; +import java.util.List; import junit.framework.TestCase; public class TestOLE2Embeding extends TestCase { - public void testEmbeding() throws Exception { - String dirname = System.getProperty("HSSF.testdata.path"); - String filename = dirname + "/ole2-embedding.xls"; + public void testEmbeding() throws Exception { + String dirname = System.getProperty("HSSF.testdata.path"); + String filename = dirname + "/ole2-embedding.xls"; - File file = new File(filename); - FileInputStream in = new FileInputStream(file); - HSSFWorkbook workbook; + File file = new File(filename); + FileInputStream in = new FileInputStream(file); + HSSFWorkbook workbook; - // This used to break, until bug #43116 was fixed - workbook = new HSSFWorkbook(in); + // This used to break, until bug #43116 was fixed + workbook = new HSSFWorkbook(in); - in.close(); + in.close(); + + // Check we can get at the Escher layer still + workbook.getAllPictures(); + } + + public void testEmbeddedObjects() throws Exception { + String dirname = System.getProperty("HSSF.testdata.path"); + String filename = dirname + "/ole2-embedding.xls"; + + File file = new File(filename); + HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(file)); + List objects = workbook.getAllEmbeddedObjects(); + assertEquals("Wrong number of objects", 2, objects.size()); + assertEquals("Wrong name for first object", "MBD06CAB431", + ((HSSFObjectData) + objects.get(0)).getDirectory().getName()); + assertEquals("Wrong name for second object", "MBD06CAC85A", + ((HSSFObjectData) + objects.get(1)).getDirectory().getName()); + } - // Check we can get at the Escher layer still - workbook.getAllPictures(); - } }