From e00185760def2f43ac50e0718c3cfc46fcc5a088 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Fri, 8 Jul 2005 15:57:07 +0000 Subject: [PATCH] Record level dumper of powerpoint files, and their constituent escher parts Prints out in indented text what occurs where (Code has existed for a while, but the lookup table used to be just a cut'n'paste from something of unknown copyright status. Now converted to using Yegor's new RecordTypes class, so can be released) git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353743 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/poi/hslf/dev/SlideShowDumper.java | 412 ++++++++++++++++++ 1 file changed, 412 insertions(+) create mode 100644 src/scratchpad/src/org/apache/poi/hslf/dev/SlideShowDumper.java diff --git a/src/scratchpad/src/org/apache/poi/hslf/dev/SlideShowDumper.java b/src/scratchpad/src/org/apache/poi/hslf/dev/SlideShowDumper.java new file mode 100644 index 000000000..e90ba097a --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/dev/SlideShowDumper.java @@ -0,0 +1,412 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + + +package org.apache.poi.hslf.dev; + +import java.util.*; +import java.io.*; + +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.poifs.filesystem.POIFSDocument; +import org.apache.poi.poifs.filesystem.DocumentEntry; + +import org.apache.poi.ddf.*; +import org.apache.poi.hslf.record.RecordTypes; + +import org.apache.poi.util.LittleEndian; + +/** + * This class provides a way to "peek" inside a powerpoint file. It + * will print out all the types it find, and for those it know aren't + * atoms, what they contain + * + * To figure out what things are, and if they are atoms or not, used the + * list in hslf.record.RecordTypes + * + * To peek inside PPDrawings, which hold Escher drawings, we use the + * DDF package from POI (but we can fake it by using the Escher listings + * from hslf.record.RecordTypes also) + * + * @author Nick Burch + */ + +public class SlideShowDumper +{ + private InputStream istream; + private POIFSFileSystem filesystem; + + private byte[] _docstream; + + /** Do we try to use DDF to understand the escher objects? */ + private boolean ddfEscher = false; + /** Do we use our own built-in basic escher groker to understand the escher objects? */ + private boolean basicEscher = false; + + /** + * right now this function takes one parameter: a ppt file, and outputs + * a dump of what it contains + */ + public static void main(String args[]) throws IOException + { + if(args.length == 0) { + System.err.println("Useage: SlideShowDumper [-escher|-basicescher] "); + return; + } + + String filename = args[0]; + if(args.length > 1) { + filename = args[1]; + } + + SlideShowDumper foo = new SlideShowDumper(filename); + + if(args.length > 1) { + if(args[0].equalsIgnoreCase("-escher")) { + foo.setDDFEscher(true); + } else { + foo.setBasicEscher(true); + } + } + + foo.printDump(); + foo.close(); + } + + + /** + * Constructs a Powerpoint dump from fileName. Parses the document + * and dumps out the contents + * + * @param fileName The name of the file to read. + * @throws IOException if there is a problem while parsing the document. + */ + public SlideShowDumper(String fileName) throws IOException + { + this(new FileInputStream(fileName)); + } + + /** + * Constructs a Powerpoint dump from an input stream. Parses the + * document and dumps out the contents + * + * @param inputStream the source of the data + * @throws IOException if there is a problem while parsing the document. + */ + public SlideShowDumper(InputStream inputStream) throws IOException + { + //do Ole stuff + this(new POIFSFileSystem(inputStream)); + istream = inputStream; + } + + /** + * Constructs a Powerpoint dump from a POIFS Filesystem. Parses the + * document and dumps out the contents + * + * @param filesystem the POIFS FileSystem to read from + * @throws IOException if there is a problem while parsing the document. + */ + public SlideShowDumper(POIFSFileSystem filesystem) throws IOException + { + this.filesystem = filesystem; + + // Get the main document stream + DocumentEntry docProps = + (DocumentEntry)filesystem.getRoot().getEntry("PowerPoint Document"); + + // Grab the document stream + _docstream = new byte[docProps.getSize()]; + filesystem.createDocumentInputStream("PowerPoint Document").read(_docstream); + } + + /** + * Control dumping of any Escher records found - should DDF be used? + */ + public void setDDFEscher(boolean grok) { + ddfEscher = grok; + basicEscher = !(grok); + } + + /** + * Control dumping of any Escher records found - should our built in + * basic groker be used? + */ + public void setBasicEscher(boolean grok) { + basicEscher = grok; + ddfEscher = !(grok); + } + + /** + * Shuts things down. Closes underlying streams etc + * + * @throws IOException + */ + public void close() throws IOException + { + if(istream != null) { + istream.close(); + } + filesystem = null; + } + + + public void printDump() { + // The format of records in a powerpoint file are: + // + // + // + // If it has a zero length, following it will be another record + // + // If it has a length, depending on its type it may have children or data + // If it has children, these will follow straight away + // > + // If it has data, this will come straigh after, and run for the length + // + // All lengths given exclude the 8 byte record header + // (Data records are known as Atoms) + + // Document should start with: + // 0F 00 E8 03 ## ## ## ## + // (type 1000 = document, info 00 0f is normal, rest is document length) + // 01 00 E9 03 28 00 00 00 + // (type 1001 = document atom, info 00 01 normal, 28 bytes long) + + // When parsing a document, look to see if you know about that type + // of the current record. If you know it's a type that has children, + // process the record's data area looking for more records + // If you know about the type and it doesn't have children, either do + // something with the data (eg TextRun) or skip over it + // Otherwise, check the first byte. If you do a BINARY_AND on it with + // 0x0f (15) and get back 0x0f, you know it has children. Otherwise + // it doesn't + + walkTree(0,0,_docstream.length); +} + +public String makeHex(short s) { + String hex = Integer.toHexString((int)s).toUpperCase(); + if(hex.length() == 1) { return "0" + hex; } + return hex; +} +public String makeHex(int i) { + String hex = Integer.toHexString(i).toUpperCase(); + if(hex.length() == 1) { return "000" + hex; } + if(hex.length() == 2) { return "00" + hex; } + if(hex.length() == 3) { return "0" + hex; } + return hex; +} + +public void walkTree(int depth, int startPos, int maxLen) { + int pos = startPos; + int endPos = startPos + maxLen; + int indent = depth; + while(pos <= endPos - 8) { + long type = LittleEndian.getUShort(_docstream,pos+2); + long len = LittleEndian.getUInt(_docstream,pos+4); + byte opt = _docstream[pos]; + + String ind = ""; + for(int i=0; i 8 ) { + // Assume it has children, rather than being corrupted + walkEscherDDF((indent+3), pos + 8, (int)atomLen ); + + // Wind on our length + our header + pos += atomLen; + pos += 8; + len -= atomLen; + len -= 8; + } else { + // No children, wind on our real length + pos += atomLen; + pos += 8; + len -= atomLen; + len -= 8; + } + + // Move on to the next one, if we're not at the end yet + if(len >= 8) { + walkEscherDDF(indent, pos, len ); + } + } + + /** + * Use the basic record format groking code to walk the Escher records + */ + public void walkEscherBasic(int indent, int pos, int len) { + if(len < 8) { return; } + + String ind = ""; + for(int i=0; i