Record level dumper of powerpoint files, and their constituent escher parts
Prints out in indented text what occurs where (Code has existed for a while, but the lookup table used to be just a cut'n'paste from something of unknown copyright status. Now converted to using Yegor's new RecordTypes class, so can be released) git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353743 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3a1dcb4a85
commit
e00185760d
412
src/scratchpad/src/org/apache/poi/hslf/dev/SlideShowDumper.java
Normal file
412
src/scratchpad/src/org/apache/poi/hslf/dev/SlideShowDumper.java
Normal file
@ -0,0 +1,412 @@
|
|||||||
|
|
||||||
|
/* ====================================================================
|
||||||
|
Copyright 2002-2004 Apache Software Foundation
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.poi.hslf.dev;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import java.io.*;
|
||||||
|
|
||||||
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
import org.apache.poi.poifs.filesystem.POIFSDocument;
|
||||||
|
import org.apache.poi.poifs.filesystem.DocumentEntry;
|
||||||
|
|
||||||
|
import org.apache.poi.ddf.*;
|
||||||
|
import org.apache.poi.hslf.record.RecordTypes;
|
||||||
|
|
||||||
|
import org.apache.poi.util.LittleEndian;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class provides a way to "peek" inside a powerpoint file. It
|
||||||
|
* will print out all the types it find, and for those it know aren't
|
||||||
|
* atoms, what they contain
|
||||||
|
*
|
||||||
|
* To figure out what things are, and if they are atoms or not, used the
|
||||||
|
* list in hslf.record.RecordTypes
|
||||||
|
*
|
||||||
|
* To peek inside PPDrawings, which hold Escher drawings, we use the
|
||||||
|
* DDF package from POI (but we can fake it by using the Escher listings
|
||||||
|
* from hslf.record.RecordTypes also)
|
||||||
|
*
|
||||||
|
* @author Nick Burch
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class SlideShowDumper
|
||||||
|
{
|
||||||
|
private InputStream istream;
|
||||||
|
private POIFSFileSystem filesystem;
|
||||||
|
|
||||||
|
private byte[] _docstream;
|
||||||
|
|
||||||
|
/** Do we try to use DDF to understand the escher objects? */
|
||||||
|
private boolean ddfEscher = false;
|
||||||
|
/** Do we use our own built-in basic escher groker to understand the escher objects? */
|
||||||
|
private boolean basicEscher = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* right now this function takes one parameter: a ppt file, and outputs
|
||||||
|
* a dump of what it contains
|
||||||
|
*/
|
||||||
|
public static void main(String args[]) throws IOException
|
||||||
|
{
|
||||||
|
if(args.length == 0) {
|
||||||
|
System.err.println("Useage: SlideShowDumper [-escher|-basicescher] <filename>");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
String filename = args[0];
|
||||||
|
if(args.length > 1) {
|
||||||
|
filename = args[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
SlideShowDumper foo = new SlideShowDumper(filename);
|
||||||
|
|
||||||
|
if(args.length > 1) {
|
||||||
|
if(args[0].equalsIgnoreCase("-escher")) {
|
||||||
|
foo.setDDFEscher(true);
|
||||||
|
} else {
|
||||||
|
foo.setBasicEscher(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foo.printDump();
|
||||||
|
foo.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a Powerpoint dump from fileName. Parses the document
|
||||||
|
* and dumps out the contents
|
||||||
|
*
|
||||||
|
* @param fileName The name of the file to read.
|
||||||
|
* @throws IOException if there is a problem while parsing the document.
|
||||||
|
*/
|
||||||
|
public SlideShowDumper(String fileName) throws IOException
|
||||||
|
{
|
||||||
|
this(new FileInputStream(fileName));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a Powerpoint dump from an input stream. Parses the
|
||||||
|
* document and dumps out the contents
|
||||||
|
*
|
||||||
|
* @param inputStream the source of the data
|
||||||
|
* @throws IOException if there is a problem while parsing the document.
|
||||||
|
*/
|
||||||
|
public SlideShowDumper(InputStream inputStream) throws IOException
|
||||||
|
{
|
||||||
|
//do Ole stuff
|
||||||
|
this(new POIFSFileSystem(inputStream));
|
||||||
|
istream = inputStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a Powerpoint dump from a POIFS Filesystem. Parses the
|
||||||
|
* document and dumps out the contents
|
||||||
|
*
|
||||||
|
* @param filesystem the POIFS FileSystem to read from
|
||||||
|
* @throws IOException if there is a problem while parsing the document.
|
||||||
|
*/
|
||||||
|
public SlideShowDumper(POIFSFileSystem filesystem) throws IOException
|
||||||
|
{
|
||||||
|
this.filesystem = filesystem;
|
||||||
|
|
||||||
|
// Get the main document stream
|
||||||
|
DocumentEntry docProps =
|
||||||
|
(DocumentEntry)filesystem.getRoot().getEntry("PowerPoint Document");
|
||||||
|
|
||||||
|
// Grab the document stream
|
||||||
|
_docstream = new byte[docProps.getSize()];
|
||||||
|
filesystem.createDocumentInputStream("PowerPoint Document").read(_docstream);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Control dumping of any Escher records found - should DDF be used?
|
||||||
|
*/
|
||||||
|
public void setDDFEscher(boolean grok) {
|
||||||
|
ddfEscher = grok;
|
||||||
|
basicEscher = !(grok);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Control dumping of any Escher records found - should our built in
|
||||||
|
* basic groker be used?
|
||||||
|
*/
|
||||||
|
public void setBasicEscher(boolean grok) {
|
||||||
|
basicEscher = grok;
|
||||||
|
ddfEscher = !(grok);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shuts things down. Closes underlying streams etc
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void close() throws IOException
|
||||||
|
{
|
||||||
|
if(istream != null) {
|
||||||
|
istream.close();
|
||||||
|
}
|
||||||
|
filesystem = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void printDump() {
|
||||||
|
// The format of records in a powerpoint file are:
|
||||||
|
// <little endian 2 byte "info">
|
||||||
|
// <little endian 2 byte "type">
|
||||||
|
// <little endian 4 byte "length">
|
||||||
|
// If it has a zero length, following it will be another record
|
||||||
|
// <xx xx yy yy 00 00 00 00> <xx xx yy yy zz zz zz zz>
|
||||||
|
// If it has a length, depending on its type it may have children or data
|
||||||
|
// If it has children, these will follow straight away
|
||||||
|
// <xx xx yy yy zz zz zz zz <xx xx yy yy zz zz zz zz>>
|
||||||
|
// If it has data, this will come straigh after, and run for the length
|
||||||
|
// <xx xx yy yy zz zz zz zz dd dd dd dd dd dd dd>
|
||||||
|
// All lengths given exclude the 8 byte record header
|
||||||
|
// (Data records are known as Atoms)
|
||||||
|
|
||||||
|
// Document should start with:
|
||||||
|
// 0F 00 E8 03 ## ## ## ##
|
||||||
|
// (type 1000 = document, info 00 0f is normal, rest is document length)
|
||||||
|
// 01 00 E9 03 28 00 00 00
|
||||||
|
// (type 1001 = document atom, info 00 01 normal, 28 bytes long)
|
||||||
|
|
||||||
|
// When parsing a document, look to see if you know about that type
|
||||||
|
// of the current record. If you know it's a type that has children,
|
||||||
|
// process the record's data area looking for more records
|
||||||
|
// If you know about the type and it doesn't have children, either do
|
||||||
|
// something with the data (eg TextRun) or skip over it
|
||||||
|
// Otherwise, check the first byte. If you do a BINARY_AND on it with
|
||||||
|
// 0x0f (15) and get back 0x0f, you know it has children. Otherwise
|
||||||
|
// it doesn't
|
||||||
|
|
||||||
|
walkTree(0,0,_docstream.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String makeHex(short s) {
|
||||||
|
String hex = Integer.toHexString((int)s).toUpperCase();
|
||||||
|
if(hex.length() == 1) { return "0" + hex; }
|
||||||
|
return hex;
|
||||||
|
}
|
||||||
|
public String makeHex(int i) {
|
||||||
|
String hex = Integer.toHexString(i).toUpperCase();
|
||||||
|
if(hex.length() == 1) { return "000" + hex; }
|
||||||
|
if(hex.length() == 2) { return "00" + hex; }
|
||||||
|
if(hex.length() == 3) { return "0" + hex; }
|
||||||
|
return hex;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void walkTree(int depth, int startPos, int maxLen) {
|
||||||
|
int pos = startPos;
|
||||||
|
int endPos = startPos + maxLen;
|
||||||
|
int indent = depth;
|
||||||
|
while(pos <= endPos - 8) {
|
||||||
|
long type = LittleEndian.getUShort(_docstream,pos+2);
|
||||||
|
long len = LittleEndian.getUInt(_docstream,pos+4);
|
||||||
|
byte opt = _docstream[pos];
|
||||||
|
|
||||||
|
String ind = "";
|
||||||
|
for(int i=0; i<indent; i++) { ind += " "; }
|
||||||
|
|
||||||
|
System.out.println(ind + "At position " + pos + " (" + makeHex(pos) + "):");
|
||||||
|
System.out.println(ind + "Type is " + type + " (" + makeHex((int)type) + "), len is " + len + " (" + makeHex((int)len) + ")");
|
||||||
|
|
||||||
|
// See if we know about the type of it
|
||||||
|
String recordName = RecordTypes.recordName((int)type);
|
||||||
|
|
||||||
|
// Jump over header, and think about going on more
|
||||||
|
pos += 8;
|
||||||
|
if(recordName != null) {
|
||||||
|
System.out.println(ind + "That's a " + recordName);
|
||||||
|
|
||||||
|
// Now check if it's a container or not
|
||||||
|
int container = (int)opt & 0x0f;
|
||||||
|
|
||||||
|
if(type == 0L || (container != 0x0f)) {
|
||||||
|
System.out.println();
|
||||||
|
} else if (type == 1035l || type == 1036l) {
|
||||||
|
// Special Handling of 1035=PPDrawingGroup and 1036=PPDrawing
|
||||||
|
System.out.println();
|
||||||
|
|
||||||
|
if(ddfEscher) {
|
||||||
|
// Seems to be:
|
||||||
|
walkEscherDDF((indent+3),pos+8,(int)len-8);
|
||||||
|
} else if(basicEscher) {
|
||||||
|
walkEscherBasic((indent+3),pos+8,(int)len-8);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// General container record handling code
|
||||||
|
System.out.println();
|
||||||
|
walkTree((indent+2),pos,(int)len);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
System.out.println(ind + "** unknown record **");
|
||||||
|
System.out.println();
|
||||||
|
}
|
||||||
|
pos += (int)len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use the DDF code to walk the Escher records
|
||||||
|
*/
|
||||||
|
public void walkEscherDDF(int indent, int pos, int len) {
|
||||||
|
if(len < 8) { return; }
|
||||||
|
|
||||||
|
String ind = "";
|
||||||
|
for(int i=0; i<indent; i++) { ind += " "; }
|
||||||
|
|
||||||
|
byte[] contents = new byte[len];
|
||||||
|
System.arraycopy(_docstream,pos,contents,0,len);
|
||||||
|
DefaultEscherRecordFactory erf = new DefaultEscherRecordFactory();
|
||||||
|
EscherRecord record = erf.createRecord(contents,0);
|
||||||
|
|
||||||
|
// For now, try filling in the fields
|
||||||
|
record.fillFields(contents,0,erf);
|
||||||
|
|
||||||
|
long atomType = LittleEndian.getUShort(contents,2);
|
||||||
|
// This lacks the 8 byte header size
|
||||||
|
long atomLen = LittleEndian.getUShort(contents,4);
|
||||||
|
// This (should) include the 8 byte header size
|
||||||
|
int recordLen = record.getRecordSize();
|
||||||
|
|
||||||
|
|
||||||
|
System.out.println(ind + "At position " + pos + " (" + makeHex(pos) + "):");
|
||||||
|
System.out.println(ind + "Type is " + atomType + " (" + makeHex((int)atomType) + "), len is " + atomLen + " (" + makeHex((int)atomLen) + ") (" + (atomLen+8) + ") - record claims " + recordLen);
|
||||||
|
|
||||||
|
// Check for corrupt / lying ones
|
||||||
|
if(recordLen != 8 && (recordLen != (atomLen+8))) {
|
||||||
|
System.out.println(ind + "** Atom length of " + atomLen + " (" + (atomLen+8) + ") doesn't match record length of " + recordLen);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print the record's details
|
||||||
|
if(record instanceof EscherContainerRecord) {
|
||||||
|
EscherContainerRecord ecr = (EscherContainerRecord)record;
|
||||||
|
System.out.println(ind + ecr.toString(false));
|
||||||
|
walkEscherDDF((indent+3), pos + 8, (int)atomLen );
|
||||||
|
} else {
|
||||||
|
System.out.println(ind + record.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle records that seem to lie
|
||||||
|
if(atomType == 61451l) {
|
||||||
|
// Normally claims a size of 8
|
||||||
|
recordLen = (int)atomLen + 8;
|
||||||
|
}
|
||||||
|
if(atomType == 61453l) {
|
||||||
|
// Returns EscherContainerRecord, but really msofbtClientTextbox
|
||||||
|
recordLen = (int)atomLen + 8;
|
||||||
|
record.fillFields( contents, 0, erf );
|
||||||
|
if(! (record instanceof EscherTextboxRecord)) {
|
||||||
|
System.out.println(ind + "** Really a msofbtClientTextbox !");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decide on what to do, based on how the lenghts match up
|
||||||
|
if(recordLen == 8 && atomLen > 8 ) {
|
||||||
|
// Assume it has children, rather than being corrupted
|
||||||
|
walkEscherDDF((indent+3), pos + 8, (int)atomLen );
|
||||||
|
|
||||||
|
// Wind on our length + our header
|
||||||
|
pos += atomLen;
|
||||||
|
pos += 8;
|
||||||
|
len -= atomLen;
|
||||||
|
len -= 8;
|
||||||
|
} else {
|
||||||
|
// No children, wind on our real length
|
||||||
|
pos += atomLen;
|
||||||
|
pos += 8;
|
||||||
|
len -= atomLen;
|
||||||
|
len -= 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move on to the next one, if we're not at the end yet
|
||||||
|
if(len >= 8) {
|
||||||
|
walkEscherDDF(indent, pos, len );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use the basic record format groking code to walk the Escher records
|
||||||
|
*/
|
||||||
|
public void walkEscherBasic(int indent, int pos, int len) {
|
||||||
|
if(len < 8) { return; }
|
||||||
|
|
||||||
|
String ind = "";
|
||||||
|
for(int i=0; i<indent; i++) { ind += " "; }
|
||||||
|
|
||||||
|
long type = LittleEndian.getUShort(_docstream,pos+2);
|
||||||
|
long atomlen = LittleEndian.getUInt(_docstream,pos+4);
|
||||||
|
String typeS = makeHex((int)type);
|
||||||
|
|
||||||
|
System.out.println(ind + "At position " + pos + " (" + makeHex(pos) + "):");
|
||||||
|
System.out.println(ind + "Type is " + type + " (" + typeS + "), len is " + atomlen + " (" + makeHex((int)atomlen) + ")");
|
||||||
|
|
||||||
|
String typeName = RecordTypes.recordName((int)type);
|
||||||
|
if(typeName != null) {
|
||||||
|
System.out.println(ind + "That's an Escher Record: " + typeName);
|
||||||
|
} else {
|
||||||
|
System.out.println(ind + "(Unknown Escher Record)");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Code to print the first 8 bytes
|
||||||
|
// System.out.print(ind);
|
||||||
|
// for(int i=0; i<8; i++) {
|
||||||
|
// short bv = _docstream[i+pos];
|
||||||
|
// if(bv < 0) { bv += 256; }
|
||||||
|
// System.out.print(i + "=" + bv + " (" + makeHex(bv) + ") ");
|
||||||
|
// }
|
||||||
|
// System.out.println("");
|
||||||
|
|
||||||
|
// Record specific dumps
|
||||||
|
if(type == 61453l) {
|
||||||
|
// Text Box. Print out first 8 bytes of data, then 8 4 later
|
||||||
|
System.out.print(ind);
|
||||||
|
for(int i=8; i<16; i++) {
|
||||||
|
short bv = _docstream[i+pos];
|
||||||
|
if(bv < 0) { bv += 256; }
|
||||||
|
System.out.print(i + "=" + bv + " (" + makeHex(bv) + ") ");
|
||||||
|
}
|
||||||
|
System.out.println("");
|
||||||
|
System.out.print(ind);
|
||||||
|
for(int i=20; i<28; i++) {
|
||||||
|
short bv = _docstream[i+pos];
|
||||||
|
if(bv < 0) { bv += 256; }
|
||||||
|
System.out.print(i + "=" + bv + " (" + makeHex(bv) + ") ");
|
||||||
|
}
|
||||||
|
System.out.println("");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Blank line before next entry
|
||||||
|
System.out.println("");
|
||||||
|
|
||||||
|
// Look in children if we are a container
|
||||||
|
if(type == 61443l || type == 61444l) {
|
||||||
|
walkEscherBasic((indent+3), pos+8, (int)atomlen);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keep going if not yet at end
|
||||||
|
if(atomlen < len) {
|
||||||
|
int atomleni = (int)atomlen;
|
||||||
|
walkEscherBasic(indent, pos+atomleni+8, len-atomleni-8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user