From 1cd1e8546f5c733cd13001df5f5b2dccb60e6885 Mon Sep 17 00:00:00 2001 From: Yegor Kozlov Date: Wed, 16 Apr 2008 11:57:15 +0000 Subject: [PATCH] Support for getting OLE object data from slide show git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@648674 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/changes.xml | 1 + .../content/xdocs/hslf/how-to-shapes.xml | 109 ++++++++++- src/documentation/content/xdocs/status.xml | 1 + .../org/apache/poi/hslf/HSLFSlideShow.java | 20 +- .../org/apache/poi/hslf/model/OLEShape.java | 171 ++++++++++++++++++ .../src/org/apache/poi/hslf/model/Shape.java | 2 +- .../apache/poi/hslf/model/ShapeFactory.java | 13 +- .../apache/poi/hslf/record/ExOleObjStg.java | 20 +- .../apache/poi/hslf/record/PersistRecord.java | 37 ++++ .../apache/poi/hslf/usermodel/ObjectData.java | 9 + .../apache/poi/hslf/usermodel/SlideShow.java | 8 +- .../poi/hslf/model/TestOleEmbedding.java | 46 +++++ 12 files changed, 412 insertions(+), 25 deletions(-) create mode 100755 src/scratchpad/src/org/apache/poi/hslf/model/OLEShape.java create mode 100755 src/scratchpad/src/org/apache/poi/hslf/record/PersistRecord.java diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 817bd818b..0b3a7005b 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + HSLF: Support for getting OLE object data from slide show HSLF: Implemented more methods in PPGraphics2D HSLF: Added Freeform shape which can contain both lines and Bezier curves 41071 - Improved text extraction in HSLF diff --git a/src/documentation/content/xdocs/hslf/how-to-shapes.xml b/src/documentation/content/xdocs/hslf/how-to-shapes.xml index a94ab41fa..eec5ffcaf 100644 --- a/src/documentation/content/xdocs/hslf/how-to-shapes.xml +++ b/src/documentation/content/xdocs/hslf/how-to-shapes.xml @@ -41,6 +41,9 @@
  • Hyperlinks
  • Tables
  • How to remove shapes
  • +
  • How to retrieve embedded OLE objects
  • +
  • How to create shapes of arbitrary geometry
  • +
  • Shapes and Graphics2D
  • Features @@ -81,14 +84,8 @@
    How to get shapes contained in a particular slide -

    The superclass of all shapes in HSLF is the Shape class - the elemental object that composes a drawing. - The following pictute shows the class tree of HSLF shapes: -

    - Class Tree of HSLF Shapes -

    -

    - The following fragment demonstrates how to iterate over shapes for each slide. + The following code demonstrates how to iterate over shapes for each slide.

    SlideShow ppt = new SlideShow(new HSLFSlideShow("slideshow.ppt")); @@ -456,8 +453,104 @@ }
    + +
    How to retrieve embedded OLE objects + -
    + Shape[] shape = slide.getShapes(); + for (int i = 0; i < shape.length; i++) { + if (shape[i] instanceof OLEShape) { + OLEShape ole = (OLEShape) shape[i]; + ObjectData data = ole.getObjectData(); + String name = ole.getInstanceName(); + if ("Worksheet".equals(name)) { + HSSFWorkbook wb = new HSSFWorkbook(data.getData()); + } else if ("Document".equals(name)) { + HWPFDocument doc = new HWPFDocument(data.getData()); + } + } + } + + + + +
    How to create shapes of arbitrary geometry + + + SlideShow ppt = new SlideShow(); + Slide slide = ppt.createSlide(); + + java.awt.geom.GeneralPath path = new java.awt.geom.GeneralPath(); + path.moveTo(100, 100); + path.lineTo(200, 100); + path.curveTo(50, 45, 134, 22, 78, 133); + path.curveTo(10, 45, 134, 56, 78, 100); + path.lineTo(100, 200); + path.closePath(); + + Freeform shape = new Freeform(); + shape.setPath(path); + slide.addShape(shape); + +
    + + +
    How to draw into a slide using Graphics2D + + Current implementation of the PowerPoint Graphics2D driver is not fully compliant with the java.awt.Graphics2D specification. + Some features like clipping, drawing of images are not yet supported. + + + SlideShow ppt = new SlideShow(); + Slide slide = ppt.createSlide(); + + //draw a simple bar graph + //bar chart data. The first value is the bar color, the second is the width + Object[] def = new Object[]{ + Color.yellow, new Integer(100), + Color.green, new Integer(150), + Color.gray, new Integer(75), + Color.red, new Integer(200), + }; + + //all objects are drawn into a shape group so we need to create one + + ShapeGroup group = new ShapeGroup(); + //define position of the drawing in the slide + Rectangle bounds = new java.awt.Rectangle(200, 100, 350, 300); + //if you want to draw in the entire slide area then define the anchor as follows: + //Dimension pgsize = ppt.getPageSize(); + //java.awt.Rectangle bounds = new java.awt.Rectangle(0, 0, pgsize.width, pgsize.height); + + group.setAnchor(bounds); + slide.addShape(group); + + //draw a simple bar chart + Graphics2D graphics = new PPGraphics2D(group); + int x = bounds.x + 50, y = bounds.y + 50; + graphics.setFont(new Font("Arial", Font.BOLD, 10)); + for (int i = 0, idx = 1; i < def.length; i+=2, idx++) { + graphics.setColor(Color.black); + int width = ((Integer)def[i+1]).intValue(); + graphics.drawString("Q" + idx, x-20, y+20); + graphics.drawString(width + "%", x + width + 10, y + 20); + graphics.setColor((Color)def[i]); + graphics.fill(new Rectangle(x, y, width, 30)); + y += 40; + } + graphics.setColor(Color.black); + graphics.setFont(new Font("Arial", Font.BOLD, 14)); + graphics.draw(bounds); + graphics.drawString("Performance", x + 70, y + 40); + + FileOutputStream out = new FileOutputStream("hslf-graphics2d.ppt"); + ppt.write(out); + out.close(); + + +
    + + diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 6fb6c35c0..9e3405591 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + HSLF: Support for getting OLE object data from slide show HSLF: Implemented more methods in PPGraphics2D HSLF: Added Freeform shape which can contain both lines and Bezier curves 41071 - Improved text extraction in HSLF diff --git a/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java index 29f01b315..dc967fd5b 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java +++ b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java @@ -27,22 +27,13 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Hashtable; -import java.util.Iterator; -import java.util.List; +import java.util.*; import org.apache.poi.POIDocument; import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException; import org.apache.poi.hslf.exceptions.EncryptedPowerPointFileException; import org.apache.poi.hslf.exceptions.HSLFException; -import org.apache.poi.hslf.record.CurrentUserAtom; -import org.apache.poi.hslf.record.ExOleObjStg; -import org.apache.poi.hslf.record.PersistPtrHolder; -import org.apache.poi.hslf.record.PositionDependentRecord; -import org.apache.poi.hslf.record.Record; -import org.apache.poi.hslf.record.UserEditAtom; +import org.apache.poi.hslf.record.*; import org.apache.poi.hslf.usermodel.ObjectData; import org.apache.poi.hslf.usermodel.PictureData; import org.apache.poi.hslf.model.Shape; @@ -253,6 +244,7 @@ public class HSLFSlideShow extends POIDocument private Record[] read(byte[] docstream, int usrOffset){ ArrayList lst = new ArrayList(); + HashMap offset2id = new HashMap(); while (usrOffset != 0){ UserEditAtom usr = (UserEditAtom) Record.buildRecordAtOffset(docstream, usrOffset); lst.add(new Integer(usrOffset)); @@ -266,6 +258,7 @@ public class HSLFSlideShow extends POIDocument Integer offset = (Integer)entries.get(id); lst.add(offset); + offset2id.put(offset, id); } usrOffset = usr.getLastUserEditAtomOffset(); @@ -278,6 +271,11 @@ public class HSLFSlideShow extends POIDocument for (int i = 0; i < a.length; i++) { Integer offset = (Integer)a[i]; rec[i] = (Record)Record.buildRecordAtOffset(docstream, offset.intValue()); + if(rec[i] instanceof PersistRecord) { + PersistRecord psr = (PersistRecord)rec[i]; + Integer id = (Integer)offset2id.get(offset); + psr.setPersistId(id.intValue()); + } } return rec; diff --git a/src/scratchpad/src/org/apache/poi/hslf/model/OLEShape.java b/src/scratchpad/src/org/apache/poi/hslf/model/OLEShape.java new file mode 100755 index 000000000..908dc4d9b --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/model/OLEShape.java @@ -0,0 +1,171 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.poi.hslf.model; + +import org.apache.poi.ddf.*; +import org.apache.poi.hslf.usermodel.PictureData; +import org.apache.poi.hslf.usermodel.SlideShow; +import org.apache.poi.hslf.usermodel.ObjectData; +import org.apache.poi.hslf.record.Document; +import org.apache.poi.hslf.record.ExObjList; +import org.apache.poi.hslf.record.Record; +import org.apache.poi.hslf.record.ExEmbed; +import org.apache.poi.hslf.blip.Bitmap; +import org.apache.poi.util.POILogger; + +import javax.imageio.ImageIO; +import java.awt.image.BufferedImage; +import java.awt.*; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.List; +import java.util.Arrays; + + +/** + * A shape representing embedded OLE obejct. + * + * @author Yegor Kozlov + */ +public class OLEShape extends Picture { + protected ExEmbed _exEmbed; + + /** + * Create a new OLEShape + * + * @param idx the index of the picture + */ + public OLEShape(int idx){ + super(idx); + } + + /** + * Create a new OLEShape + * + * @param idx the index of the picture + * @param parent the parent shape + */ + public OLEShape(int idx, Shape parent) { + super(idx, parent); + } + + /** + * Create a OLEShape object + * + * @param escherRecord the EscherSpContainer record which holds information about + * this picture in the Slide + * @param parent the parent shape of this picture + */ + protected OLEShape(EscherContainerRecord escherRecord, Shape parent){ + super(escherRecord, parent); + } + + /** + * Returns unique identifier for the OLE object. + * + * @return the unique identifier for the OLE object + */ + public int getObjectID(){ + return getEscherProperty(EscherProperties.BLIP__PICTUREID); + } + + /** + * Returns unique identifier for the OLE object. + * + * @return the unique identifier for the OLE object + */ + public ObjectData getObjectData(){ + SlideShow ppt = getSheet().getSlideShow(); + ObjectData[] ole = ppt.getEmbeddedObjects(); + + //persist reference + int ref = getExEmbed().getExOleObjAtom().getObjStgDataRef(); + for (int i = 0; i < ole.length; i++) { + if(ole[i].getExOleObjStg().getPersistId() == ref) return ole[i]; + + } + logger.log(POILogger.WARN, "OLE data not found"); + return null; + } + + /** + * Return the record container for this embedded object. + * + *

    + * It contains: + * 1. ExEmbedAtom.(4045) + * 2. ExOleObjAtom (4035) + * 3. CString (4026), Instance MenuName (1) used for menus and the Links dialog box. + * 4. CString (4026), Instance ProgID (2) that stores the OLE Programmatic Identifier. + * A ProgID is a string that uniquely identifies a given object. + * 5. CString (4026), Instance ClipboardName (3) that appears in the paste special dialog. + * 6. MetaFile( 4033), optional + *

    + * @return + */ + public ExEmbed getExEmbed(){ + if(_exEmbed == null){ + SlideShow ppt = getSheet().getSlideShow(); + + ExObjList lst = ppt.getDocumentRecord().getExObjList(); + if(lst == null){ + logger.log(POILogger.WARN, "ExObjList not found"); + return null; + } + + int id = getObjectID(); + Record[] ch = lst.getChildRecords(); + for (int i = 0; i < ch.length; i++) { + if(ch[i] instanceof ExEmbed){ + ExEmbed embd = (ExEmbed)ch[i]; + if( embd.getExOleObjAtom().getObjID() == id) _exEmbed = embd; + } + } + } + return _exEmbed; + } + + /** + * Returns the instance name of the embedded object, e.g. "Document" or "Workbook". + * + * @return the instance name of the embedded object + */ + public String getInstanceName(){ + return getExEmbed().getMenuName(); + } + + /** + * Returns the full name of the embedded object, + * e.g. "Microsoft Word Document" or "Microsoft Office Excel Worksheet". + * + * @return the full name of the embedded object + */ + public String getFullName(){ + return getExEmbed().getClipboardName(); + } + + /** + * Returns the ProgID that stores the OLE Programmatic Identifier. + * A ProgID is a string that uniquely identifies a given object, for example, + * "Word.Document.8" or "Excel.Sheet.8". + * + * @return the ProgID + */ + public String getProgID(){ + return getExEmbed().getProgId(); + } +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/model/Shape.java b/src/scratchpad/src/org/apache/poi/hslf/model/Shape.java index f93392a9a..7b67c35fb 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/model/Shape.java +++ b/src/scratchpad/src/org/apache/poi/hslf/model/Shape.java @@ -294,7 +294,7 @@ public abstract class Shape { public int getEscherProperty(short propId){ EscherOptRecord opt = (EscherOptRecord)getEscherChild(_escherContainer, EscherOptRecord.RECORD_ID); EscherSimpleProperty prop = (EscherSimpleProperty)getEscherProperty(opt, propId); - return prop == null ? 0 : prop.getPropertyNumber(); + return prop == null ? 0 : prop.getPropertyValue(); } /** diff --git a/src/scratchpad/src/org/apache/poi/hslf/model/ShapeFactory.java b/src/scratchpad/src/org/apache/poi/hslf/model/ShapeFactory.java index 76b43ebb8..4abc8c1c0 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/model/ShapeFactory.java +++ b/src/scratchpad/src/org/apache/poi/hslf/model/ShapeFactory.java @@ -76,13 +76,19 @@ public class ShapeFactory { case ShapeTypes.TextBox: shape = new TextBox(spContainer, parent); break; - case ShapeTypes.PictureFrame: - shape = new Picture(spContainer, parent); + case ShapeTypes.PictureFrame: { + EscherOptRecord opt = (EscherOptRecord)Shape.getEscherChild(spContainer, EscherOptRecord.RECORD_ID); + EscherProperty prop = Shape.getEscherProperty(opt, EscherProperties.BLIP__PICTUREID); + if(prop != null) + shape = new OLEShape(spContainer, parent); //presence of BLIP__PICTUREID indicates it is an embedded object + else + shape = new Picture(spContainer, parent); break; + } case ShapeTypes.Line: shape = new Line(spContainer, parent); break; - case ShapeTypes.NotPrimitive: + case ShapeTypes.NotPrimitive: { EscherOptRecord opt = (EscherOptRecord)Shape.getEscherChild(spContainer, EscherOptRecord.RECORD_ID); EscherProperty prop = Shape.getEscherProperty(opt, EscherProperties.GEOMETRY__VERTICES); if(prop != null) @@ -93,6 +99,7 @@ public class ShapeFactory { shape = new AutoShape(spContainer, parent); } break; + } default: shape = new AutoShape(spContainer, parent); break; diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/ExOleObjStg.java b/src/scratchpad/src/org/apache/poi/hslf/record/ExOleObjStg.java index 7c4742792..197497fd0 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/record/ExOleObjStg.java +++ b/src/scratchpad/src/org/apache/poi/hslf/record/ExOleObjStg.java @@ -30,7 +30,10 @@ import org.apache.poi.util.LittleEndian; * * @author Daniel Noll */ -public class ExOleObjStg extends RecordAtom { +public class ExOleObjStg extends RecordAtom implements PersistRecord { + + private int _persistId; // Found from PersistPtrHolder + /** * Record header. */ @@ -109,4 +112,19 @@ public class ExOleObjStg extends RecordAtom { out.write(_header); out.write(_data); } + + /** + * Fetch our sheet ID, as found from a PersistPtrHolder. + * Should match the RefId of our matching SlidePersistAtom + */ + public int getPersistId() { + return _persistId; + } + + /** + * Set our sheet ID, as found from a PersistPtrHolder + */ + public void setPersistId(int id) { + _persistId = id; + } } diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/PersistRecord.java b/src/scratchpad/src/org/apache/poi/hslf/record/PersistRecord.java new file mode 100755 index 000000000..8238d10fb --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/record/PersistRecord.java @@ -0,0 +1,37 @@ + +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hslf.record; + +/** + * A record that can be referenced in PersistPtr storage. + * + * @author Yegor Kozlov + */ +public interface PersistRecord { + + /** + * Fetch the persist ID + */ + public int getPersistId(); + + /** + * Set the persist ID + */ + public void setPersistId(int id); +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/usermodel/ObjectData.java b/src/scratchpad/src/org/apache/poi/hslf/usermodel/ObjectData.java index 66b4f0373..957e78868 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/usermodel/ObjectData.java +++ b/src/scratchpad/src/org/apache/poi/hslf/usermodel/ObjectData.java @@ -48,4 +48,13 @@ public class ObjectData { public InputStream getData() { return storage.getData(); } + + /** + * Return the record that contains the object data. + * + * @return the record that contains the object data. + */ + public ExOleObjStg getExOleObjStg() { + return storage; + } } diff --git a/src/scratchpad/src/org/apache/poi/hslf/usermodel/SlideShow.java b/src/scratchpad/src/org/apache/poi/hslf/usermodel/SlideShow.java index 21e30ac2a..418dc3d2f 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/usermodel/SlideShow.java +++ b/src/scratchpad/src/org/apache/poi/hslf/usermodel/SlideShow.java @@ -487,7 +487,13 @@ public class SlideShow public PictureData[] getPictureData() { return _hslfSlideShow.getPictures(); } - + + /** + * Returns the data of all the embedded OLE object in the SlideShow + */ + public ObjectData[] getEmbeddedObjects() { + return _hslfSlideShow.getEmbeddedObjects(); + } /** * Return the current page size */ diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/model/TestOleEmbedding.java b/src/scratchpad/testcases/org/apache/poi/hslf/model/TestOleEmbedding.java index 7be6f9dc3..cb177048b 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/model/TestOleEmbedding.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/model/TestOleEmbedding.java @@ -21,10 +21,19 @@ package org.apache.poi.hslf.model; import java.io.*; +import java.util.List; +import java.util.ArrayList; import org.apache.poi.hslf.HSLFSlideShow; import org.apache.poi.hslf.usermodel.ObjectData; import org.apache.poi.hslf.usermodel.PictureData; +import org.apache.poi.hslf.usermodel.SlideShow; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.hssf.usermodel.HSSFSheet; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.usermodel.Range; +import org.apache.poi.hwpf.usermodel.Paragraph; import junit.framework.TestCase; @@ -59,4 +68,41 @@ public class TestOleEmbedding extends TestCase slideShow.close(); } } + + public void testOLEShape() throws Exception { + String dirname = System.getProperty("HSLF.testdata.path"); + File file = new File(dirname, "ole2-embedding-2003.ppt"); + FileInputStream is = new FileInputStream(file); + SlideShow ppt = new SlideShow(is); + is.close(); + + Slide slide = ppt.getSlides()[0]; + Shape[] sh = slide.getShapes(); + int cnt = 0; + for (int i = 0; i < sh.length; i++) { + if(sh[i] instanceof OLEShape){ + cnt++; + OLEShape ole = (OLEShape)sh[i]; + ObjectData data = ole.getObjectData(); + if("Worksheet".equals(ole.getInstanceName())){ + //Voila! we created a workbook from the embedded OLE data + HSSFWorkbook wb = new HSSFWorkbook(data.getData()); + HSSFSheet sheet = wb.getSheetAt(0); + //verify we can access the xls data + assertEquals(1, sheet.getRow(0).getCell((short)0).getNumericCellValue(), 0); + assertEquals(1, sheet.getRow(1).getCell((short)0).getNumericCellValue(), 0); + assertEquals(2, sheet.getRow(2).getCell((short)0).getNumericCellValue(), 0); + assertEquals(3, sheet.getRow(3).getCell((short)0).getNumericCellValue(), 0); + assertEquals(8, sheet.getRow(5).getCell((short)0).getNumericCellValue(), 0); + } else if ("Document".equals(ole.getInstanceName())){ + //creating a HWPF document + HWPFDocument doc = new HWPFDocument(data.getData()); + String txt = doc.getRange().getParagraph(0).text(); + assertEquals("OLE embedding is thoroughly unremarkable.\r", txt); + } + } + + } + assertEquals("Expected 2 OLE shapes", 2, cnt); + } }