Support for getting OLE object data from slide show

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@648674 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2008-04-16 11:57:15 +00:00
parent e52de74f2d
commit 1cd1e8546f
12 changed files with 412 additions and 25 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! --> <!-- Don't forget to update status.xml too! -->
<release version="3.0.3-beta1" date="2008-04-??"> <release version="3.0.3-beta1" date="2008-04-??">
<action dev="POI-DEVELOPERS" type="add">HSLF: Support for getting OLE object data from slide show </action>
<action dev="POI-DEVELOPERS" type="add">HSLF: Implemented more methods in PPGraphics2D</action> <action dev="POI-DEVELOPERS" type="add">HSLF: Implemented more methods in PPGraphics2D</action>
<action dev="POI-DEVELOPERS" type="add">HSLF: Added Freeform shape which can contain both lines and Bezier curves</action> <action dev="POI-DEVELOPERS" type="add">HSLF: Added Freeform shape which can contain both lines and Bezier curves</action>
<action dev="POI-DEVELOPERS" type="fix">41071 - Improved text extraction in HSLF</action> <action dev="POI-DEVELOPERS" type="fix">41071 - Improved text extraction in HSLF</action>

View File

@ -41,6 +41,9 @@
<li><link href="#Hyperlinks">Hyperlinks</link></li> <li><link href="#Hyperlinks">Hyperlinks</link></li>
<li><link href="#Tables">Tables</link></li> <li><link href="#Tables">Tables</link></li>
<li><link href="#RemoveShape">How to remove shapes</link></li> <li><link href="#RemoveShape">How to remove shapes</link></li>
<li><link href="#OLE">How to retrieve embedded OLE objects</link></li>
<li><link href="#Freeform">How to create shapes of arbitrary geometry</link></li>
<li><link href="#Graphics2D">Shapes and Graphics2D</link></li>
</ul> </ul>
</section> </section>
<section><title>Features</title> <section><title>Features</title>
@ -81,14 +84,8 @@
</section> </section>
<anchor id="GetShapes"/> <anchor id="GetShapes"/>
<section><title>How to get shapes contained in a particular slide</title> <section><title>How to get shapes contained in a particular slide</title>
<p>The superclass of all shapes in HSLF is the Shape class - the elemental object that composes a drawing.
The following pictute shows the class tree of HSLF shapes:
</p>
<p> <p>
<img src="images/hslf_shapes.gif" alt="Class Tree of HSLF Shapes" width="611" height="412"/> The following code demonstrates how to iterate over shapes for each slide.
</p>
<p>
The following fragment demonstrates how to iterate over shapes for each slide.
</p> </p>
<source> <source>
SlideShow ppt = new SlideShow(new HSLFSlideShow("slideshow.ppt")); SlideShow ppt = new SlideShow(new HSLFSlideShow("slideshow.ppt"));
@ -456,8 +453,104 @@
} }
</source> </source>
</section> </section>
<anchor id="OLE"/>
<section><title>How to retrieve embedded OLE objects</title>
<source>
</section> Shape[] shape = slide.getShapes();
for (int i = 0; i &lt; shape.length; i++) {
if (shape[i] instanceof OLEShape) {
OLEShape ole = (OLEShape) shape[i];
ObjectData data = ole.getObjectData();
String name = ole.getInstanceName();
if ("Worksheet".equals(name)) {
HSSFWorkbook wb = new HSSFWorkbook(data.getData());
} else if ("Document".equals(name)) {
HWPFDocument doc = new HWPFDocument(data.getData());
}
}
}
</source>
</section>
<anchor id="Freeform"/>
<section><title>How to create shapes of arbitrary geometry</title>
<source>
SlideShow ppt = new SlideShow();
Slide slide = ppt.createSlide();
java.awt.geom.GeneralPath path = new java.awt.geom.GeneralPath();
path.moveTo(100, 100);
path.lineTo(200, 100);
path.curveTo(50, 45, 134, 22, 78, 133);
path.curveTo(10, 45, 134, 56, 78, 100);
path.lineTo(100, 200);
path.closePath();
Freeform shape = new Freeform();
shape.setPath(path);
slide.addShape(shape);
</source>
</section>
<anchor id="Graphics2D"/>
<section><title>How to draw into a slide using Graphics2D</title>
<warning>
Current implementation of the PowerPoint Graphics2D driver is not fully compliant with the java.awt.Graphics2D specification.
Some features like clipping, drawing of images are not yet supported.
</warning>
<source>
SlideShow ppt = new SlideShow();
Slide slide = ppt.createSlide();
//draw a simple bar graph
//bar chart data. The first value is the bar color, the second is the width
Object[] def = new Object[]{
Color.yellow, new Integer(100),
Color.green, new Integer(150),
Color.gray, new Integer(75),
Color.red, new Integer(200),
};
//all objects are drawn into a shape group so we need to create one
ShapeGroup group = new ShapeGroup();
//define position of the drawing in the slide
Rectangle bounds = new java.awt.Rectangle(200, 100, 350, 300);
//if you want to draw in the entire slide area then define the anchor as follows:
//Dimension pgsize = ppt.getPageSize();
//java.awt.Rectangle bounds = new java.awt.Rectangle(0, 0, pgsize.width, pgsize.height);
group.setAnchor(bounds);
slide.addShape(group);
//draw a simple bar chart
Graphics2D graphics = new PPGraphics2D(group);
int x = bounds.x + 50, y = bounds.y + 50;
graphics.setFont(new Font("Arial", Font.BOLD, 10));
for (int i = 0, idx = 1; i &lt; def.length; i+=2, idx++) {
graphics.setColor(Color.black);
int width = ((Integer)def[i+1]).intValue();
graphics.drawString("Q" + idx, x-20, y+20);
graphics.drawString(width + "%", x + width + 10, y + 20);
graphics.setColor((Color)def[i]);
graphics.fill(new Rectangle(x, y, width, 30));
y += 40;
}
graphics.setColor(Color.black);
graphics.setFont(new Font("Arial", Font.BOLD, 14));
graphics.draw(bounds);
graphics.drawString("Performance", x + 70, y + 40);
FileOutputStream out = new FileOutputStream("hslf-graphics2d.ppt");
ppt.write(out);
out.close();
</source>
</section>
</section>
</section> </section>
</body> </body>
</document> </document>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! --> <!-- Don't forget to update changes.xml too! -->
<changes> <changes>
<release version="3.0.3-beta1" date="2008-04-??"> <release version="3.0.3-beta1" date="2008-04-??">
<action dev="POI-DEVELOPERS" type="add">HSLF: Support for getting OLE object data from slide show </action>
<action dev="POI-DEVELOPERS" type="add">HSLF: Implemented more methods in PPGraphics2D</action> <action dev="POI-DEVELOPERS" type="add">HSLF: Implemented more methods in PPGraphics2D</action>
<action dev="POI-DEVELOPERS" type="add">HSLF: Added Freeform shape which can contain both lines and Bezier curves</action> <action dev="POI-DEVELOPERS" type="add">HSLF: Added Freeform shape which can contain both lines and Bezier curves</action>
<action dev="POI-DEVELOPERS" type="fix">41071 - Improved text extraction in HSLF</action> <action dev="POI-DEVELOPERS" type="fix">41071 - Improved text extraction in HSLF</action>

View File

@ -27,22 +27,13 @@ import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.ArrayList; import java.util.*;
import java.util.Arrays;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.POIDocument; import org.apache.poi.POIDocument;
import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException; import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException;
import org.apache.poi.hslf.exceptions.EncryptedPowerPointFileException; import org.apache.poi.hslf.exceptions.EncryptedPowerPointFileException;
import org.apache.poi.hslf.exceptions.HSLFException; import org.apache.poi.hslf.exceptions.HSLFException;
import org.apache.poi.hslf.record.CurrentUserAtom; import org.apache.poi.hslf.record.*;
import org.apache.poi.hslf.record.ExOleObjStg;
import org.apache.poi.hslf.record.PersistPtrHolder;
import org.apache.poi.hslf.record.PositionDependentRecord;
import org.apache.poi.hslf.record.Record;
import org.apache.poi.hslf.record.UserEditAtom;
import org.apache.poi.hslf.usermodel.ObjectData; import org.apache.poi.hslf.usermodel.ObjectData;
import org.apache.poi.hslf.usermodel.PictureData; import org.apache.poi.hslf.usermodel.PictureData;
import org.apache.poi.hslf.model.Shape; import org.apache.poi.hslf.model.Shape;
@ -253,6 +244,7 @@ public class HSLFSlideShow extends POIDocument
private Record[] read(byte[] docstream, int usrOffset){ private Record[] read(byte[] docstream, int usrOffset){
ArrayList lst = new ArrayList(); ArrayList lst = new ArrayList();
HashMap offset2id = new HashMap();
while (usrOffset != 0){ while (usrOffset != 0){
UserEditAtom usr = (UserEditAtom) Record.buildRecordAtOffset(docstream, usrOffset); UserEditAtom usr = (UserEditAtom) Record.buildRecordAtOffset(docstream, usrOffset);
lst.add(new Integer(usrOffset)); lst.add(new Integer(usrOffset));
@ -266,6 +258,7 @@ public class HSLFSlideShow extends POIDocument
Integer offset = (Integer)entries.get(id); Integer offset = (Integer)entries.get(id);
lst.add(offset); lst.add(offset);
offset2id.put(offset, id);
} }
usrOffset = usr.getLastUserEditAtomOffset(); usrOffset = usr.getLastUserEditAtomOffset();
@ -278,6 +271,11 @@ public class HSLFSlideShow extends POIDocument
for (int i = 0; i < a.length; i++) { for (int i = 0; i < a.length; i++) {
Integer offset = (Integer)a[i]; Integer offset = (Integer)a[i];
rec[i] = (Record)Record.buildRecordAtOffset(docstream, offset.intValue()); rec[i] = (Record)Record.buildRecordAtOffset(docstream, offset.intValue());
if(rec[i] instanceof PersistRecord) {
PersistRecord psr = (PersistRecord)rec[i];
Integer id = (Integer)offset2id.get(offset);
psr.setPersistId(id.intValue());
}
} }
return rec; return rec;

View File

@ -0,0 +1,171 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.poi.hslf.model;
import org.apache.poi.ddf.*;
import org.apache.poi.hslf.usermodel.PictureData;
import org.apache.poi.hslf.usermodel.SlideShow;
import org.apache.poi.hslf.usermodel.ObjectData;
import org.apache.poi.hslf.record.Document;
import org.apache.poi.hslf.record.ExObjList;
import org.apache.poi.hslf.record.Record;
import org.apache.poi.hslf.record.ExEmbed;
import org.apache.poi.hslf.blip.Bitmap;
import org.apache.poi.util.POILogger;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.awt.*;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.List;
import java.util.Arrays;
/**
* A shape representing embedded OLE obejct.
*
* @author Yegor Kozlov
*/
public class OLEShape extends Picture {
protected ExEmbed _exEmbed;
/**
* Create a new <code>OLEShape</code>
*
* @param idx the index of the picture
*/
public OLEShape(int idx){
super(idx);
}
/**
* Create a new <code>OLEShape</code>
*
* @param idx the index of the picture
* @param parent the parent shape
*/
public OLEShape(int idx, Shape parent) {
super(idx, parent);
}
/**
* Create a <code>OLEShape</code> object
*
* @param escherRecord the <code>EscherSpContainer</code> record which holds information about
* this picture in the <code>Slide</code>
* @param parent the parent shape of this picture
*/
protected OLEShape(EscherContainerRecord escherRecord, Shape parent){
super(escherRecord, parent);
}
/**
* Returns unique identifier for the OLE object.
*
* @return the unique identifier for the OLE object
*/
public int getObjectID(){
return getEscherProperty(EscherProperties.BLIP__PICTUREID);
}
/**
* Returns unique identifier for the OLE object.
*
* @return the unique identifier for the OLE object
*/
public ObjectData getObjectData(){
SlideShow ppt = getSheet().getSlideShow();
ObjectData[] ole = ppt.getEmbeddedObjects();
//persist reference
int ref = getExEmbed().getExOleObjAtom().getObjStgDataRef();
for (int i = 0; i < ole.length; i++) {
if(ole[i].getExOleObjStg().getPersistId() == ref) return ole[i];
}
logger.log(POILogger.WARN, "OLE data not found");
return null;
}
/**
* Return the record container for this embedded object.
*
* <p>
* It contains:
* 1. ExEmbedAtom.(4045)
* 2. ExOleObjAtom (4035)
* 3. CString (4026), Instance MenuName (1) used for menus and the Links dialog box.
* 4. CString (4026), Instance ProgID (2) that stores the OLE Programmatic Identifier.
* A ProgID is a string that uniquely identifies a given object.
* 5. CString (4026), Instance ClipboardName (3) that appears in the paste special dialog.
* 6. MetaFile( 4033), optional
* </p>
* @return
*/
public ExEmbed getExEmbed(){
if(_exEmbed == null){
SlideShow ppt = getSheet().getSlideShow();
ExObjList lst = ppt.getDocumentRecord().getExObjList();
if(lst == null){
logger.log(POILogger.WARN, "ExObjList not found");
return null;
}
int id = getObjectID();
Record[] ch = lst.getChildRecords();
for (int i = 0; i < ch.length; i++) {
if(ch[i] instanceof ExEmbed){
ExEmbed embd = (ExEmbed)ch[i];
if( embd.getExOleObjAtom().getObjID() == id) _exEmbed = embd;
}
}
}
return _exEmbed;
}
/**
* Returns the instance name of the embedded object, e.g. "Document" or "Workbook".
*
* @return the instance name of the embedded object
*/
public String getInstanceName(){
return getExEmbed().getMenuName();
}
/**
* Returns the full name of the embedded object,
* e.g. "Microsoft Word Document" or "Microsoft Office Excel Worksheet".
*
* @return the full name of the embedded object
*/
public String getFullName(){
return getExEmbed().getClipboardName();
}
/**
* Returns the ProgID that stores the OLE Programmatic Identifier.
* A ProgID is a string that uniquely identifies a given object, for example,
* "Word.Document.8" or "Excel.Sheet.8".
*
* @return the ProgID
*/
public String getProgID(){
return getExEmbed().getProgId();
}
}

View File

@ -294,7 +294,7 @@ public abstract class Shape {
public int getEscherProperty(short propId){ public int getEscherProperty(short propId){
EscherOptRecord opt = (EscherOptRecord)getEscherChild(_escherContainer, EscherOptRecord.RECORD_ID); EscherOptRecord opt = (EscherOptRecord)getEscherChild(_escherContainer, EscherOptRecord.RECORD_ID);
EscherSimpleProperty prop = (EscherSimpleProperty)getEscherProperty(opt, propId); EscherSimpleProperty prop = (EscherSimpleProperty)getEscherProperty(opt, propId);
return prop == null ? 0 : prop.getPropertyNumber(); return prop == null ? 0 : prop.getPropertyValue();
} }
/** /**

View File

@ -76,13 +76,19 @@ public class ShapeFactory {
case ShapeTypes.TextBox: case ShapeTypes.TextBox:
shape = new TextBox(spContainer, parent); shape = new TextBox(spContainer, parent);
break; break;
case ShapeTypes.PictureFrame: case ShapeTypes.PictureFrame: {
shape = new Picture(spContainer, parent); EscherOptRecord opt = (EscherOptRecord)Shape.getEscherChild(spContainer, EscherOptRecord.RECORD_ID);
EscherProperty prop = Shape.getEscherProperty(opt, EscherProperties.BLIP__PICTUREID);
if(prop != null)
shape = new OLEShape(spContainer, parent); //presence of BLIP__PICTUREID indicates it is an embedded object
else
shape = new Picture(spContainer, parent);
break; break;
}
case ShapeTypes.Line: case ShapeTypes.Line:
shape = new Line(spContainer, parent); shape = new Line(spContainer, parent);
break; break;
case ShapeTypes.NotPrimitive: case ShapeTypes.NotPrimitive: {
EscherOptRecord opt = (EscherOptRecord)Shape.getEscherChild(spContainer, EscherOptRecord.RECORD_ID); EscherOptRecord opt = (EscherOptRecord)Shape.getEscherChild(spContainer, EscherOptRecord.RECORD_ID);
EscherProperty prop = Shape.getEscherProperty(opt, EscherProperties.GEOMETRY__VERTICES); EscherProperty prop = Shape.getEscherProperty(opt, EscherProperties.GEOMETRY__VERTICES);
if(prop != null) if(prop != null)
@ -93,6 +99,7 @@ public class ShapeFactory {
shape = new AutoShape(spContainer, parent); shape = new AutoShape(spContainer, parent);
} }
break; break;
}
default: default:
shape = new AutoShape(spContainer, parent); shape = new AutoShape(spContainer, parent);
break; break;

View File

@ -30,7 +30,10 @@ import org.apache.poi.util.LittleEndian;
* *
* @author Daniel Noll * @author Daniel Noll
*/ */
public class ExOleObjStg extends RecordAtom { public class ExOleObjStg extends RecordAtom implements PersistRecord {
private int _persistId; // Found from PersistPtrHolder
/** /**
* Record header. * Record header.
*/ */
@ -109,4 +112,19 @@ public class ExOleObjStg extends RecordAtom {
out.write(_header); out.write(_header);
out.write(_data); out.write(_data);
} }
/**
* Fetch our sheet ID, as found from a PersistPtrHolder.
* Should match the RefId of our matching SlidePersistAtom
*/
public int getPersistId() {
return _persistId;
}
/**
* Set our sheet ID, as found from a PersistPtrHolder
*/
public void setPersistId(int id) {
_persistId = id;
}
} }

View File

@ -0,0 +1,37 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
/**
* A record that can be referenced in PersistPtr storage.
*
* @author Yegor Kozlov
*/
public interface PersistRecord {
/**
* Fetch the persist ID
*/
public int getPersistId();
/**
* Set the persist ID
*/
public void setPersistId(int id);
}

View File

@ -48,4 +48,13 @@ public class ObjectData {
public InputStream getData() { public InputStream getData() {
return storage.getData(); return storage.getData();
} }
/**
* Return the record that contains the object data.
*
* @return the record that contains the object data.
*/
public ExOleObjStg getExOleObjStg() {
return storage;
}
} }

View File

@ -487,7 +487,13 @@ public class SlideShow
public PictureData[] getPictureData() { public PictureData[] getPictureData() {
return _hslfSlideShow.getPictures(); return _hslfSlideShow.getPictures();
} }
/**
* Returns the data of all the embedded OLE object in the SlideShow
*/
public ObjectData[] getEmbeddedObjects() {
return _hslfSlideShow.getEmbeddedObjects();
}
/** /**
* Return the current page size * Return the current page size
*/ */

View File

@ -21,10 +21,19 @@
package org.apache.poi.hslf.model; package org.apache.poi.hslf.model;
import java.io.*; import java.io.*;
import java.util.List;
import java.util.ArrayList;
import org.apache.poi.hslf.HSLFSlideShow; import org.apache.poi.hslf.HSLFSlideShow;
import org.apache.poi.hslf.usermodel.ObjectData; import org.apache.poi.hslf.usermodel.ObjectData;
import org.apache.poi.hslf.usermodel.PictureData; import org.apache.poi.hslf.usermodel.PictureData;
import org.apache.poi.hslf.usermodel.SlideShow;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Paragraph;
import junit.framework.TestCase; import junit.framework.TestCase;
@ -59,4 +68,41 @@ public class TestOleEmbedding extends TestCase
slideShow.close(); slideShow.close();
} }
} }
public void testOLEShape() throws Exception {
String dirname = System.getProperty("HSLF.testdata.path");
File file = new File(dirname, "ole2-embedding-2003.ppt");
FileInputStream is = new FileInputStream(file);
SlideShow ppt = new SlideShow(is);
is.close();
Slide slide = ppt.getSlides()[0];
Shape[] sh = slide.getShapes();
int cnt = 0;
for (int i = 0; i < sh.length; i++) {
if(sh[i] instanceof OLEShape){
cnt++;
OLEShape ole = (OLEShape)sh[i];
ObjectData data = ole.getObjectData();
if("Worksheet".equals(ole.getInstanceName())){
//Voila! we created a workbook from the embedded OLE data
HSSFWorkbook wb = new HSSFWorkbook(data.getData());
HSSFSheet sheet = wb.getSheetAt(0);
//verify we can access the xls data
assertEquals(1, sheet.getRow(0).getCell((short)0).getNumericCellValue(), 0);
assertEquals(1, sheet.getRow(1).getCell((short)0).getNumericCellValue(), 0);
assertEquals(2, sheet.getRow(2).getCell((short)0).getNumericCellValue(), 0);
assertEquals(3, sheet.getRow(3).getCell((short)0).getNumericCellValue(), 0);
assertEquals(8, sheet.getRow(5).getCell((short)0).getNumericCellValue(), 0);
} else if ("Document".equals(ole.getInstanceName())){
//creating a HWPF document
HWPFDocument doc = new HWPFDocument(data.getData());
String txt = doc.getRange().getParagraph(0).text();
assertEquals("OLE embedding is thoroughly unremarkable.\r", txt);
}
}
}
assertEquals("Expected 2 OLE shapes", 2, cnt);
}
} }