Support for getting OLE objects from HSSFWorkbook. See bug 43222 for details.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@573878 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2007-09-08 16:34:10 +00:00
parent a82e65ea7b
commit 177caeec53
6 changed files with 372 additions and 13 deletions

View File

@ -0,0 +1,184 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record;
import org.apache.poi.util.*;
/**
* A sub-record within the OBJ record which stores a reference to an object
* stored in a separate entry within the OLE2 compound file.
*
* @author Daniel Noll
*/
public class EmbeddedObjectRefSubRecord
extends SubRecord
{
public static final short sid = 0x9;
public short field_1_stream_id_offset; // Offset to stream ID from the point after this value.
public short[] field_2_unknown; // Unknown stuff at the front. TODO: Confirm that it's a short[]
// TODO: Consider making a utility class for these. I've discovered the same field ordering
// in FormatRecord and StringRecord, it may be elsewhere too.
public short field_3_unicode_len; // Length of Unicode string.
public boolean field_4_unicode_flag; // Flags whether the string is Unicode.
public String field_5_ole_classname; // Classname of the embedded OLE document (e.g. Word.Document.8)
public int field_6_stream_id; // ID of the OLE stream containing the actual data.
public EmbeddedObjectRefSubRecord()
{
}
/**
* Constructs an EmbeddedObjectRef record and sets its fields appropriately.
*
* @param in the record input stream.
*/
public EmbeddedObjectRefSubRecord(RecordInputStream in)
{
super(in);
}
/**
* Checks the sid matches the expected side for this record
*
* @param id the expected sid.
*/
protected void validateSid(short id)
{
if (id != sid)
{
throw new RecordFormatException("Not a EmbeddedObjectRef record");
}
}
public short getSid()
{
return sid;
}
protected void fillFields(RecordInputStream in)
{
field_1_stream_id_offset = in.readShort();
field_2_unknown = in.readShortArray();
field_3_unicode_len = in.readShort();
field_4_unicode_flag = ( in.readByte() & 0x01 ) != 0;
if ( field_4_unicode_flag )
{
field_5_ole_classname = in.readUnicodeLEString( field_3_unicode_len );
}
else
{
field_5_ole_classname = in.readCompressedUnicode( field_3_unicode_len );
}
// Padded with NUL bytes. The -2 is because field_1_stream_id_offset
// is relative to after the offset field, whereas in.getRecordOffset()
// is relative to the start of this record.
while (in.getRecordOffset() - 2 < field_1_stream_id_offset)
{
in.readByte(); // discard
}
field_6_stream_id = in.readInt();
}
public int serialize(int offset, byte[] data)
{
int pos = offset;
LittleEndian.putShort(data, pos, field_1_stream_id_offset); pos += 2;
LittleEndian.putShortArray(data, pos, field_2_unknown); pos += field_2_unknown.length * 2 + 2;
LittleEndian.putShort(data, pos, field_3_unicode_len); pos += 2;
data[pos] = field_4_unicode_flag ? (byte) 0x01 : (byte) 0x00; pos++;
if ( field_4_unicode_flag )
{
StringUtil.putUnicodeLE( field_5_ole_classname, data, pos ); pos += field_5_ole_classname.length() * 2;
}
else
{
StringUtil.putCompressedUnicode( field_5_ole_classname, data, pos ); pos += field_5_ole_classname.length();
}
// Padded with NUL bytes.
pos = field_1_stream_id_offset;
LittleEndian.putInt(data, pos, field_6_stream_id); pos += 4;
return getRecordSize();
}
/**
* Size of record (exluding 4 byte header)
*/
public int getRecordSize()
{
// Conveniently this stores the length of all the crap before the final int value.
return field_1_stream_id_offset + 4;
}
/**
* Gets the stream ID containing the actual data. The data itself
* can be found under a top-level directory entry in the OLE2 filesystem
* under the name "MBD<var>xxxxxxxx</var>" where <var>xxxxxxxx</var> is
* this ID converted into hex (in big endian order, funnily enough.)
*
* @return the data stream ID.
*/
public int getStreamId()
{
return field_6_stream_id;
}
public String toString()
{
StringBuffer buffer = new StringBuffer();
buffer.append("[ftPictFmla]\n");
buffer.append(" .streamIdOffset = ")
.append("0x").append(HexDump.toHex( field_1_stream_id_offset ))
.append(" (").append( field_1_stream_id_offset ).append(" )")
.append(System.getProperty("line.separator"));
buffer.append(" .unknown = ")
.append("0x").append(HexDump.toHex( field_2_unknown ))
.append(" (").append( field_2_unknown ).append(" )")
.append(System.getProperty("line.separator"));
buffer.append(" .unicodeLen = ")
.append("0x").append(HexDump.toHex( field_3_unicode_len ))
.append(" (").append( field_3_unicode_len ).append(" )")
.append(System.getProperty("line.separator"));
buffer.append(" .unicodeFlag = ")
.append("0x").append( field_4_unicode_flag ? 0x01 : 0x00 )
.append(" (").append( field_4_unicode_flag ).append(" )")
.append(System.getProperty("line.separator"));
buffer.append(" .oleClassname = ")
.append(field_5_ole_classname)
.append(System.getProperty("line.separator"));
buffer.append(" .streamId = ")
.append("0x").append(HexDump.toHex( field_6_stream_id ))
.append(" (").append( field_6_stream_id ).append(" )")
.append(System.getProperty("line.separator"));
buffer.append("[/ftPictFmla]");
return buffer.toString();
}
}

View File

@ -58,6 +58,9 @@ abstract public class SubRecord
case CommonObjectDataSubRecord.sid:
r = new CommonObjectDataSubRecord( in );
break;
case EmbeddedObjectRefSubRecord.sid:
r = new EmbeddedObjectRefSubRecord( in );
break;
case GroupMarkerSubRecord.sid:
r = new GroupMarkerSubRecord( in );
break;

View File

@ -0,0 +1,90 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.usermodel;
import java.io.IOException;
import java.util.Iterator;
import org.apache.poi.hssf.record.EmbeddedObjectRefSubRecord;
import org.apache.poi.hssf.record.ObjRecord;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.HexDump;
/**
* Represents binary object (i.e. OLE) data stored in the file. Eg. A GIF, JPEG etc...
*
* @author Daniel Noll
*/
public class HSSFObjectData
{
/**
* Underlying object record ultimately containing a reference to the object.
*/
private ObjRecord record;
/**
* Reference to the filesystem, required for retrieving the object data.
*/
private POIFSFileSystem poifs;
/**
* Constructs object data by wrapping a lower level object record.
*
* @param record the low-level object record.
* @param poifs the filesystem, required for retrieving the object data.
*/
public HSSFObjectData(ObjRecord record, POIFSFileSystem poifs)
{
this.record = record;
this.poifs = poifs;
}
/**
* Gets the object data.
*
* @return the object data as an OLE2 directory.
* @throws IOException if there was an error reading the data.
*/
public DirectoryEntry getDirectory() throws IOException
{
Iterator subRecordIter = record.getSubRecords().iterator();
while (subRecordIter.hasNext())
{
Object subRecord = subRecordIter.next();
if (subRecord instanceof EmbeddedObjectRefSubRecord)
{
int streamId = ((EmbeddedObjectRefSubRecord) subRecord).getStreamId();
String streamName = "MBD" + HexDump.toHex(streamId);
Entry entry = poifs.getRoot().getEntry(streamName);
if (entry instanceof DirectoryEntry)
{
return (DirectoryEntry) entry;
}
else
{
throw new IOException("Stream " + streamName + " was not an OLE2 directory");
}
}
}
throw new IllegalStateException("Object data does not contain a reference to an embedded object OLE2 directory");
}
}

View File

@ -1332,6 +1332,7 @@ public class HSSFWorkbook
*/
public List getAllPictures()
{
// The drawing group record always exists at the top level, so we won't need to do this recursively.
List pictures = new ArrayList();
Iterator recordIter = workbook.getRecords().iterator();
while (recordIter.hasNext())
@ -1395,6 +1396,50 @@ public class HSSFWorkbook
this.workbook.unwriteProtectWorkbook();
}
/**
* Gets all embedded OLE2 objects from the Workbook.
*
* @return the list of embedded objects (a list of {@link HSSFObjectData} objects.)
*/
public List getAllEmbeddedObjects()
{
List objects = new ArrayList();
for (int i = 0; i < getNumberOfSheets(); i++)
{
getAllEmbeddedObjects(getSheetAt(i).getSheet().getRecords(), objects);
}
return objects;
}
/**
* Gets all embedded OLE2 objects from the Workbook.
*
* @param records the list of records to search.
* @param objects the list of embedded objects to populate.
*/
private void getAllEmbeddedObjects(List records, List objects)
{
Iterator recordIter = records.iterator();
while (recordIter.hasNext())
{
Object obj = recordIter.next();
if (obj instanceof ObjRecord)
{
// TODO: More convenient way of determining if there is stored binary.
// TODO: Link to the data stored in the other stream.
Iterator subRecordIter = ((ObjRecord) obj).getSubRecords().iterator();
while (subRecordIter.hasNext())
{
Object sub = subRecordIter.next();
if (sub instanceof EmbeddedObjectRefSubRecord)
{
objects.add(new HSSFObjectData((ObjRecord) obj, poifs));
}
}
}
}
}
private byte[] newUID()
{
byte[] bytes = new byte[16];

View File

@ -268,6 +268,25 @@ public class HexDump
return retVal.toString();
}
/**
* Converts the parameter to a hex value.
*
* @param value The value to convert
* @return A String representing the array of shorts
*/
public static String toHex(final short[] value)
{
StringBuffer retVal = new StringBuffer();
retVal.append('[');
for(int x = 0; x < value.length; x++)
{
retVal.append(toHex(value[x]));
retVal.append(", ");
}
retVal.append(']');
return retVal.toString();
}
/**
* <p>Converts the parameter to a hex value breaking the results into
* lines.</p>

View File

@ -19,6 +19,7 @@ package org.apache.poi.hssf.usermodel;
import java.io.File;
import java.io.FileInputStream;
import java.util.List;
import junit.framework.TestCase;
@ -39,5 +40,22 @@ public class TestOLE2Embeding extends TestCase {
// Check we can get at the Escher layer still
workbook.getAllPictures();
}
public void testEmbeddedObjects() throws Exception {
String dirname = System.getProperty("HSSF.testdata.path");
String filename = dirname + "/ole2-embedding.xls";
File file = new File(filename);
HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(file));
List objects = workbook.getAllEmbeddedObjects();
assertEquals("Wrong number of objects", 2, objects.size());
assertEquals("Wrong name for first object", "MBD06CAB431",
((HSSFObjectData)
objects.get(0)).getDirectory().getName());
assertEquals("Wrong name for second object", "MBD06CAC85A",
((HSSFObjectData)
objects.get(1)).getDirectory().getName());
}
}