Patch from bug #44937 from Squeeself- Partial support for extracting Escher images from HWPF files
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@658302 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1747a02fa7
commit
2e633955d9
@ -37,6 +37,7 @@
|
||||
|
||||
<!-- Don't forget to update status.xml too! -->
|
||||
<release version="3.1-final" date="2008-06-??">
|
||||
<action dev="POI-DEVELOPERS" type="add">44937 - Partial support for extracting Escher images from HWPF files</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">44824 - Avoid an infinite loop when reading some HWPF pictures</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">44898 - Correctly handle short last blocks in POIFS</action>
|
||||
</release>
|
||||
|
@ -34,6 +34,7 @@
|
||||
<!-- Don't forget to update changes.xml too! -->
|
||||
<changes>
|
||||
<release version="3.1-final" date="2008-06-??">
|
||||
<action dev="POI-DEVELOPERS" type="add">44937 - Partial support for extracting Escher images from HWPF files</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">44824 - Avoid an infinite loop when reading some HWPF pictures</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">44898 - Correctly handle short last blocks in POIFS</action>
|
||||
</release>
|
||||
|
@ -65,20 +65,27 @@ public class EscherClientAnchorRecord
|
||||
int size = 0;
|
||||
|
||||
// Always find 4 two byte entries. Sometimes find 9
|
||||
field_1_flag = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
field_2_col1 = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
field_3_dx1 = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
field_4_row1 = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
if(bytesRemaining >= 18) {
|
||||
field_5_dy1 = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
field_6_col2 = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
field_7_dx2 = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
field_8_row2 = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
field_9_dy2 = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
shortRecord = false;
|
||||
} else {
|
||||
shortRecord = true;
|
||||
}
|
||||
if (bytesRemaining == 4) // Word format only 4 bytes
|
||||
{
|
||||
// Not sure exactly what the format is quite yet, likely a reference to a PLC
|
||||
}
|
||||
else
|
||||
{
|
||||
field_1_flag = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
field_2_col1 = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
field_3_dx1 = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
field_4_row1 = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
if(bytesRemaining >= 18) {
|
||||
field_5_dy1 = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
field_6_col2 = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
field_7_dx2 = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
field_8_row2 = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
field_9_dy2 = LittleEndian.getShort( data, pos + size ); size += 2;
|
||||
shortRecord = false;
|
||||
} else {
|
||||
shortRecord = true;
|
||||
}
|
||||
}
|
||||
bytesRemaining -= size;
|
||||
remainingData = new byte[bytesRemaining];
|
||||
System.arraycopy( data, pos + size, remainingData, 0, bytesRemaining );
|
||||
|
@ -53,10 +53,10 @@ public class HWPFDocument extends POIDocument
|
||||
protected FileInformationBlock _fib;
|
||||
|
||||
/** main document stream buffer*/
|
||||
private byte[] _mainStream;
|
||||
protected byte[] _mainStream;
|
||||
|
||||
/** table stream buffer*/
|
||||
private byte[] _tableStream;
|
||||
protected byte[] _tableStream;
|
||||
|
||||
/** data stream buffer*/
|
||||
protected byte[] _dataStream;
|
||||
@ -94,6 +94,12 @@ public class HWPFDocument extends POIDocument
|
||||
/** Holds pictures table */
|
||||
protected PicturesTable _pictures;
|
||||
|
||||
/** Holds FSBA (shape) information */
|
||||
protected FSPATable _fspa;
|
||||
|
||||
/** Escher Drawing Group information */
|
||||
protected EscherRecordHolder _dgg;
|
||||
|
||||
protected HWPFDocument()
|
||||
{
|
||||
super(null, null);
|
||||
@ -205,9 +211,6 @@ public class HWPFDocument extends POIDocument
|
||||
_dataStream = new byte[0];
|
||||
}
|
||||
|
||||
// read in the pictures stream
|
||||
_pictures = new PicturesTable(this, _dataStream);
|
||||
|
||||
// get the start of text in the main stream
|
||||
int fcMin = _fib.getFcMin();
|
||||
|
||||
@ -227,6 +230,20 @@ public class HWPFDocument extends POIDocument
|
||||
_pbt.adjustForDelete(0, 0, cpMin);
|
||||
}
|
||||
|
||||
// Read FSPA and Escher information
|
||||
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
|
||||
|
||||
if (_fib.getFcDggInfo() != 0)
|
||||
{
|
||||
_dgg = new EscherRecordHolder(_tableStream, _fib.getFcDggInfo(), _fib.getLcbDggInfo());
|
||||
} else
|
||||
{
|
||||
_dgg = new EscherRecordHolder();
|
||||
}
|
||||
|
||||
// read in the pictures stream
|
||||
_pictures = new PicturesTable(this, _dataStream, _mainStream, _fspa, _dgg);
|
||||
|
||||
_st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(), fcMin, getTextTable().getTextPieces());
|
||||
_ss = new StyleSheet(_tableStream, _fib.getFcStshf());
|
||||
_ft = new FontTable(_tableStream, _fib.getFcSttbfffn(), _fib.getLcbSttbfffn());
|
||||
|
@ -0,0 +1,116 @@
|
||||
/*
|
||||
* To change this template, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
|
||||
package org.apache.poi.hwpf.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import org.apache.poi.ddf.DefaultEscherRecordFactory;
|
||||
import org.apache.poi.ddf.EscherContainerRecord;
|
||||
import org.apache.poi.ddf.EscherRecord;
|
||||
import org.apache.poi.ddf.EscherRecordFactory;
|
||||
|
||||
/**
|
||||
* Based on AbstractEscherRecordHolder fomr HSSF.
|
||||
*
|
||||
* @author Squeeself
|
||||
*/
|
||||
public class EscherRecordHolder
|
||||
{
|
||||
protected ArrayList escherRecords = new ArrayList();
|
||||
|
||||
public EscherRecordHolder()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
public EscherRecordHolder(byte[] data, int offset, int size)
|
||||
{
|
||||
fillEscherRecords(data, offset, size);
|
||||
}
|
||||
|
||||
private void fillEscherRecords(byte[] data, int offset, int size)
|
||||
{
|
||||
EscherRecordFactory recordFactory = new DefaultEscherRecordFactory();
|
||||
int pos = offset;
|
||||
while ( pos < offset + size)
|
||||
{
|
||||
EscherRecord r = recordFactory.createRecord(data, pos);
|
||||
escherRecords.add(r);
|
||||
int bytesRead = r.fillFields(data, pos, recordFactory);
|
||||
pos += bytesRead + 1; // There is an empty byte between each top-level record in a Word doc
|
||||
}
|
||||
}
|
||||
|
||||
public List getEscherRecords()
|
||||
{
|
||||
return escherRecords;
|
||||
}
|
||||
|
||||
public String toString()
|
||||
{
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
|
||||
final String nl = System.getProperty("line.separator");
|
||||
if (escherRecords.size() == 0)
|
||||
buffer.append("No Escher Records Decoded" + nl);
|
||||
for ( Iterator iterator = escherRecords.iterator(); iterator.hasNext(); )
|
||||
{
|
||||
EscherRecord r = (EscherRecord) iterator.next();
|
||||
buffer.append(r.toString());
|
||||
}
|
||||
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* If we have a EscherContainerRecord as one of our
|
||||
* children (and most top level escher holders do),
|
||||
* then return that.
|
||||
*/
|
||||
public EscherContainerRecord getEscherContainer() {
|
||||
for(Iterator it = escherRecords.iterator(); it.hasNext();) {
|
||||
Object er = it.next();
|
||||
if(er instanceof EscherContainerRecord) {
|
||||
return (EscherContainerRecord)er;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Descends into all our children, returning the
|
||||
* first EscherRecord with the given id, or null
|
||||
* if none found
|
||||
*/
|
||||
public EscherRecord findFirstWithId(short id) {
|
||||
return findFirstWithId(id, getEscherRecords());
|
||||
}
|
||||
private EscherRecord findFirstWithId(short id, List records) {
|
||||
// Check at our level
|
||||
for(Iterator it = records.iterator(); it.hasNext();) {
|
||||
EscherRecord r = (EscherRecord)it.next();
|
||||
if(r.getRecordId() == id) {
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
// Then check our children in turn
|
||||
for(Iterator it = records.iterator(); it.hasNext();) {
|
||||
EscherRecord r = (EscherRecord)it.next();
|
||||
if(r.isContainerRecord()) {
|
||||
EscherRecord found =
|
||||
findFirstWithId(id, r.getChildRecords());
|
||||
if(found != null) {
|
||||
return found;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Not found in this lot
|
||||
return null;
|
||||
}
|
||||
}
|
182
src/scratchpad/src/org/apache/poi/hwpf/model/FSPA.java
Normal file
182
src/scratchpad/src/org/apache/poi/hwpf/model/FSPA.java
Normal file
@ -0,0 +1,182 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.hwpf.model;
|
||||
|
||||
import org.apache.poi.util.BitField;
|
||||
import org.apache.poi.util.BitFieldFactory;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
|
||||
/**
|
||||
* File Shape Address structure
|
||||
*
|
||||
* @author Squeeself
|
||||
*/
|
||||
public class FSPA
|
||||
{
|
||||
public static final int FSPA_SIZE = 26;
|
||||
private int spid; // Shape identifier. Used to get data position
|
||||
private int xaLeft; // Enclosing rectangle
|
||||
private int yaTop; // Enclosing rectangle
|
||||
private int xaRight; // Enclosing rectangle
|
||||
private int yaBottom; // Enclosing rectangle
|
||||
private short options;
|
||||
private static BitField fHdr = BitFieldFactory.getInstance(0x0001); // 1 in undo when in header
|
||||
private static BitField bx = BitFieldFactory.getInstance(0x0006); // x pos relative to anchor CP: 0 - page margin, 1 - top of page, 2 - text, 3 - reserved
|
||||
private static BitField by = BitFieldFactory.getInstance(0x0018); // y pos relative to anchor CP: ditto
|
||||
private static BitField wr = BitFieldFactory.getInstance(0x01E0); // Text wrapping mode: 0 - like 2 w/o absolute, 1 - no text next to shape, 2 - wrap around absolute object, 3 - wrap as if no object, 4 - wrap tightly around object, 5 - wrap tightly, allow holes, 6-15 - reserved
|
||||
private static BitField wrk = BitFieldFactory.getInstance(0x1E00); // Text wrapping mode type (for modes 2&4): 0 - wrap both sides, 1 - wrap only left, 2 - wrap only right, 3 - wrap largest side
|
||||
private static BitField fRcaSimple = BitFieldFactory.getInstance(0x2000); // Overwrites bx if set, forcing rectangle to be page relative
|
||||
private static BitField fBelowText = BitFieldFactory.getInstance(0x4000); // if true, shape is below text, otherwise above
|
||||
private static BitField fAnchorLock = BitFieldFactory.getInstance(0x8000); // if true, anchor is locked
|
||||
private int cTxbx; // Count of textboxes in shape (undo doc only)
|
||||
|
||||
public FSPA()
|
||||
{
|
||||
}
|
||||
|
||||
public FSPA(byte[] bytes, int offset)
|
||||
{
|
||||
spid = LittleEndian.getInt(bytes, offset);
|
||||
offset += LittleEndian.INT_SIZE;
|
||||
xaLeft = LittleEndian.getInt(bytes, offset);
|
||||
offset += LittleEndian.INT_SIZE;
|
||||
yaTop = LittleEndian.getInt(bytes, offset);
|
||||
offset += LittleEndian.INT_SIZE;
|
||||
xaRight = LittleEndian.getInt(bytes, offset);
|
||||
offset += LittleEndian.INT_SIZE;
|
||||
yaBottom = LittleEndian.getInt(bytes, offset);
|
||||
offset += LittleEndian.INT_SIZE;
|
||||
options = LittleEndian.getShort(bytes, offset);
|
||||
offset += LittleEndian.SHORT_SIZE;
|
||||
cTxbx = LittleEndian.getInt(bytes, offset);
|
||||
}
|
||||
|
||||
public int getSpid()
|
||||
{
|
||||
return spid;
|
||||
}
|
||||
|
||||
public int getXaLeft()
|
||||
{
|
||||
return xaLeft;
|
||||
}
|
||||
|
||||
public int getYaTop()
|
||||
{
|
||||
return yaTop;
|
||||
}
|
||||
|
||||
public int getXaRight()
|
||||
{
|
||||
return xaRight;
|
||||
}
|
||||
|
||||
public int getYaBottom()
|
||||
{
|
||||
return yaBottom;
|
||||
}
|
||||
|
||||
public boolean isFHdr()
|
||||
{
|
||||
return fHdr.isSet(options);
|
||||
}
|
||||
|
||||
public short getBx()
|
||||
{
|
||||
return bx.getShortValue(options);
|
||||
}
|
||||
|
||||
public short getBy()
|
||||
{
|
||||
return by.getShortValue(options);
|
||||
}
|
||||
|
||||
public short getWr()
|
||||
{
|
||||
return wr.getShortValue(options);
|
||||
}
|
||||
|
||||
public short getWrk()
|
||||
{
|
||||
return wrk.getShortValue(options);
|
||||
}
|
||||
|
||||
public boolean isFRcaSimple()
|
||||
{
|
||||
return fRcaSimple.isSet(options);
|
||||
}
|
||||
|
||||
public boolean isFBelowText()
|
||||
{
|
||||
return fBelowText.isSet(options);
|
||||
}
|
||||
|
||||
public boolean isFAnchorLock()
|
||||
{
|
||||
return fAnchorLock.isSet(options);
|
||||
}
|
||||
|
||||
public int getCTxbx()
|
||||
{
|
||||
return cTxbx;
|
||||
}
|
||||
|
||||
public byte[] toByteArray()
|
||||
{
|
||||
int offset = 0;
|
||||
byte[] buf = new byte[FSPA_SIZE];
|
||||
|
||||
LittleEndian.putInt(buf, offset, spid);
|
||||
offset += LittleEndian.INT_SIZE;
|
||||
LittleEndian.putInt(buf, offset, xaLeft);
|
||||
offset += LittleEndian.INT_SIZE;
|
||||
LittleEndian.putInt(buf, offset, yaTop);
|
||||
offset += LittleEndian.INT_SIZE;
|
||||
LittleEndian.putInt(buf, offset, xaRight);
|
||||
offset += LittleEndian.INT_SIZE;
|
||||
LittleEndian.putInt(buf, offset, yaBottom);
|
||||
offset += LittleEndian.INT_SIZE;
|
||||
LittleEndian.putShort(buf, offset, options);
|
||||
offset += LittleEndian.SHORT_SIZE;
|
||||
LittleEndian.putInt(buf, offset, cTxbx);
|
||||
offset += LittleEndian.INT_SIZE;
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
public String toString()
|
||||
{
|
||||
StringBuffer buf = new StringBuffer();
|
||||
buf.append("spid: ").append(spid);
|
||||
buf.append(", xaLeft: ").append(xaLeft);
|
||||
buf.append(", yaTop: ").append(yaTop);
|
||||
buf.append(", xaRight: ").append(xaRight);
|
||||
buf.append(", yaBottom: ").append(yaBottom);
|
||||
buf.append(", options: ").append(options);
|
||||
buf.append(" (fHdr: ").append(isFHdr());
|
||||
buf.append(", bx: ").append(getBx());
|
||||
buf.append(", by: ").append(getBy());
|
||||
buf.append(", wr: ").append(getWr());
|
||||
buf.append(", wrk: ").append(getWrk());
|
||||
buf.append(", fRcaSimple: ").append(isFRcaSimple());
|
||||
buf.append(", fBelowText: ").append(isFBelowText());
|
||||
buf.append(", fAnchorLock: ").append(isFAnchorLock());
|
||||
buf.append("), cTxbx: ").append(cTxbx);
|
||||
return buf.toString();
|
||||
}
|
||||
}
|
82
src/scratchpad/src/org/apache/poi/hwpf/model/FSPATable.java
Normal file
82
src/scratchpad/src/org/apache/poi/hwpf/model/FSPATable.java
Normal file
@ -0,0 +1,82 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.hwpf.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* This class holds all the FSPA (File Shape Address) structures.
|
||||
*
|
||||
* @author Squeeself
|
||||
*/
|
||||
public class FSPATable
|
||||
{
|
||||
protected ArrayList shapes = new ArrayList();
|
||||
protected HashMap cps = new HashMap();
|
||||
protected List _text;
|
||||
|
||||
public FSPATable(byte[] tableStream, int fcPlcspa, int lcbPlcspa, List tpt)
|
||||
{
|
||||
_text = tpt;
|
||||
// Will be 0 if no drawing objects in document
|
||||
if (fcPlcspa == 0)
|
||||
return;
|
||||
|
||||
PlexOfCps plex = new PlexOfCps(tableStream, fcPlcspa, lcbPlcspa, FSPA.FSPA_SIZE);
|
||||
for (int i=0; i < plex.length(); i++)
|
||||
{
|
||||
GenericPropertyNode property = plex.getProperty(i);
|
||||
FSPA fspa = new FSPA(property.getBytes(), 0);
|
||||
|
||||
shapes.add(fspa);
|
||||
cps.put(Integer.valueOf(property.getStart()), Integer.valueOf(i));
|
||||
}
|
||||
}
|
||||
|
||||
public FSPA getFspaFromCp(int cp)
|
||||
{
|
||||
Integer idx = (Integer)cps.get(Integer.valueOf(cp));
|
||||
if (idx == null)
|
||||
return null;
|
||||
return (FSPA)shapes.get(idx.intValue());
|
||||
}
|
||||
|
||||
public List getShapes()
|
||||
{
|
||||
return shapes;
|
||||
}
|
||||
|
||||
public String toString()
|
||||
{
|
||||
StringBuffer buf = new StringBuffer();
|
||||
buf.append("[FPSA PLC size=").append(shapes.size()).append("]\n");
|
||||
for (Iterator it = cps.keySet().iterator(); it.hasNext(); )
|
||||
{
|
||||
Integer i = (Integer) it.next();
|
||||
FSPA fspa = (FSPA) shapes.get(((Integer)cps.get(i)).intValue());
|
||||
buf.append(" [FC: ").append(i.toString()).append("] ");
|
||||
buf.append(fspa.toString());
|
||||
buf.append("\n");
|
||||
}
|
||||
buf.append("[/FSPA PLC]");
|
||||
return buf.toString();
|
||||
}
|
||||
}
|
@ -309,6 +309,26 @@ public class FileInformationBlock extends FIBAbstractType
|
||||
return _fieldHandler.getFieldSize(FIBFieldHandler.PLCFFLDMOM);
|
||||
}
|
||||
|
||||
public int getFcPlcspaMom()
|
||||
{
|
||||
return _fieldHandler.getFieldOffset(FIBFieldHandler.PLCSPAMOM);
|
||||
}
|
||||
|
||||
public int getLcbPlcspaMom()
|
||||
{
|
||||
return _fieldHandler.getFieldSize(FIBFieldHandler.PLCSPAMOM);
|
||||
}
|
||||
|
||||
public int getFcDggInfo()
|
||||
{
|
||||
return _fieldHandler.getFieldOffset(FIBFieldHandler.DGGINFO);
|
||||
}
|
||||
|
||||
public int getLcbDggInfo()
|
||||
{
|
||||
return _fieldHandler.getFieldSize(FIBFieldHandler.DGGINFO);
|
||||
}
|
||||
|
||||
public void writeTo (byte[] mainStream, HWPFOutputStream tableStream)
|
||||
throws IOException
|
||||
{
|
||||
|
@ -26,7 +26,12 @@ import org.apache.poi.hwpf.usermodel.Range;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import java.util.Iterator;
|
||||
import org.apache.poi.ddf.DefaultEscherRecordFactory;
|
||||
import org.apache.poi.ddf.EscherBSERecord;
|
||||
import org.apache.poi.ddf.EscherBlipRecord;
|
||||
import org.apache.poi.ddf.EscherRecord;
|
||||
import org.apache.poi.ddf.EscherRecordFactory;
|
||||
|
||||
/**
|
||||
* Holds information about all pictures embedded in Word Document either via "Insert -> Picture -> From File" or via
|
||||
@ -57,6 +62,9 @@ public class PicturesTable
|
||||
|
||||
private HWPFDocument _document;
|
||||
private byte[] _dataStream;
|
||||
private byte[] _mainStream;
|
||||
private FSPATable _fspa;
|
||||
private EscherRecordHolder _dgg;
|
||||
|
||||
/** @link dependency
|
||||
* @stereotype instantiate*/
|
||||
@ -67,10 +75,13 @@ public class PicturesTable
|
||||
* @param document
|
||||
* @param _dataStream
|
||||
*/
|
||||
public PicturesTable(HWPFDocument _document, byte[] _dataStream)
|
||||
public PicturesTable(HWPFDocument _document, byte[] _dataStream, byte[] _mainStream, FSPATable fspa, EscherRecordHolder dgg)
|
||||
{
|
||||
this._document = _document;
|
||||
this._document = _document;
|
||||
this._dataStream = _dataStream;
|
||||
this._mainStream = _mainStream;
|
||||
this._fspa = fspa;
|
||||
this._dgg = dgg;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -84,6 +95,13 @@ public class PicturesTable
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean hasEscherPicture(CharacterRun run) {
|
||||
if (run.isSpecialCharacter() && !run.isObj() && !run.isOle2() && !run.isData() && run.text().startsWith("\u0008")) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* determines whether specified CharacterRun contains reference to a picture
|
||||
* @param run
|
||||
@ -123,6 +141,46 @@ public class PicturesTable
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a recursive search for pictures in the given list of escher records.
|
||||
*
|
||||
* @param escherRecords the escher records.
|
||||
* @param pictures the list to populate with the pictures.
|
||||
*/
|
||||
private void searchForPictures(List escherRecords, List pictures)
|
||||
{
|
||||
Iterator recordIter = escherRecords.iterator();
|
||||
while (recordIter.hasNext())
|
||||
{
|
||||
Object obj = recordIter.next();
|
||||
if (obj instanceof EscherRecord)
|
||||
{
|
||||
EscherRecord escherRecord = (EscherRecord) obj;
|
||||
|
||||
if (escherRecord instanceof EscherBSERecord)
|
||||
{
|
||||
EscherBSERecord bse = (EscherBSERecord) escherRecord;
|
||||
EscherBlipRecord blip = bse.getBlipRecord();
|
||||
if (blip != null)
|
||||
{
|
||||
pictures.add(new Picture(blip.getPicturedata()));
|
||||
}
|
||||
else if (bse.getOffset() > 0)
|
||||
{
|
||||
// Blip stored in delay stream, which in a word doc, is the main stream
|
||||
EscherRecordFactory recordFactory = new DefaultEscherRecordFactory();
|
||||
blip = (EscherBlipRecord) recordFactory.createRecord(_mainStream, bse.getOffset());
|
||||
blip.fillFields(_mainStream, bse.getOffset(), recordFactory);
|
||||
pictures.add(new Picture(blip.getPicturedata()));
|
||||
}
|
||||
}
|
||||
|
||||
// Recursive call.
|
||||
searchForPictures(escherRecord.getChildRecords(), pictures);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Not all documents have all the images concatenated in the data stream
|
||||
* although MS claims so. The best approach is to scan all character runs.
|
||||
@ -136,13 +194,14 @@ public class PicturesTable
|
||||
for (int i = 0; i < range.numCharacterRuns(); i++) {
|
||||
CharacterRun run = range.getCharacterRun(i);
|
||||
String text = run.text();
|
||||
int j = text.charAt(0);
|
||||
Picture picture = extractPicture(run, false);
|
||||
if (picture != null) {
|
||||
pictures.add(picture);
|
||||
}
|
||||
}
|
||||
|
||||
searchForPictures(_dgg.getEscherRecords(), pictures);
|
||||
|
||||
return pictures;
|
||||
}
|
||||
|
||||
|
@ -99,6 +99,15 @@ public class Picture
|
||||
}
|
||||
}
|
||||
|
||||
public Picture(byte[] _dataStream)
|
||||
{
|
||||
this._dataStream = _dataStream;
|
||||
this.dataBlockStartOfsset = 0;
|
||||
this.dataBlockSize = _dataStream.length;
|
||||
this.pictureBytesStartOffset = 0;
|
||||
this.size = _dataStream.length;
|
||||
}
|
||||
|
||||
private void fillWidthHeight()
|
||||
{
|
||||
String ext = suggestFileExtension();
|
||||
|
@ -35,10 +35,12 @@ public class TestHWPFPictures extends TestCase {
|
||||
private String docAFile;
|
||||
private String docBFile;
|
||||
private String docCFile;
|
||||
private String docDFile;
|
||||
|
||||
private String imgAFile;
|
||||
private String imgBFile;
|
||||
private String imgCFile;
|
||||
private String imgDFile;
|
||||
|
||||
protected void setUp() throws Exception {
|
||||
String dirname = System.getProperty("HWPF.testdata.path");
|
||||
@ -46,10 +48,12 @@ public class TestHWPFPictures extends TestCase {
|
||||
docAFile = dirname + "/testPictures.doc";
|
||||
docBFile = dirname + "/two_images.doc";
|
||||
docCFile = dirname + "/vector_image.doc";
|
||||
docDFile = dirname + "/GaiaTest.doc";
|
||||
|
||||
imgAFile = dirname + "/simple_image.jpg";
|
||||
imgBFile = dirname + "/simple_image.png";
|
||||
imgCFile = dirname + "/vector_image.emf";
|
||||
imgDFile = dirname + "/GaiaTestImg.png";
|
||||
}
|
||||
|
||||
/**
|
||||
@ -127,6 +131,25 @@ public class TestHWPFPictures extends TestCase {
|
||||
assertBytesSame(picBytes, pic.getContent());
|
||||
}
|
||||
|
||||
/**
|
||||
* Pending the missing files being uploaded to
|
||||
* bug #44937
|
||||
*/
|
||||
public void BROKENtestEscherDrawing() throws Exception
|
||||
{
|
||||
HWPFDocument docD = new HWPFDocument(new FileInputStream(docDFile));
|
||||
List allPictures = docD.getPicturesTable().getAllPictures();
|
||||
|
||||
assertEquals(1, allPictures.size());
|
||||
|
||||
Picture pic = (Picture) allPictures.get(0);
|
||||
assertNotNull(pic);
|
||||
byte[] picD = readFile(imgDFile);
|
||||
|
||||
assertEquals(picD.length, pic.getContent().length);
|
||||
|
||||
assertBytesSame(picD, pic.getContent());
|
||||
}
|
||||
|
||||
private void assertBytesSame(byte[] a, byte[] b) {
|
||||
assertEquals(a.length, b.length);
|
||||
|
Loading…
Reference in New Issue
Block a user