From 2d222869249ec87d4ec27ba90e9b6e44c2722774 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Mon, 18 Jun 2007 18:10:57 +0000 Subject: [PATCH] First stab at hdgf, an implementation of the visio format. Basic support for processing the equivalent of records - pointers and blocks. Now to refactor it into something sane! git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@548428 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/org/apache/poi/hdgf/HDGFDiagram.java | 287 ++++++++++++++++++ .../src/org/apache/poi/hdgf/LZW4HDGF.java | 80 +++++ .../org/apache/poi/hdgf/TestLZW4HDGF.java | 101 ++++++ 3 files changed, 468 insertions(+) create mode 100644 src/scratchpad/src/org/apache/poi/hdgf/HDGFDiagram.java create mode 100644 src/scratchpad/src/org/apache/poi/hdgf/LZW4HDGF.java create mode 100644 src/scratchpad/testcases/org/apache/poi/hdgf/TestLZW4HDGF.java diff --git a/src/scratchpad/src/org/apache/poi/hdgf/HDGFDiagram.java b/src/scratchpad/src/org/apache/poi/hdgf/HDGFDiagram.java new file mode 100644 index 000000000..dfe46ac74 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hdgf/HDGFDiagram.java @@ -0,0 +1,287 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hdgf; + +import java.io.ByteArrayInputStream; +import java.io.FileInputStream; +import java.io.IOException; + +import org.apache.poi.poifs.filesystem.DocumentEntry; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.util.LittleEndian; + +/** + * See + * http://www.redferni.uklinux.net/visio/ + * http://www.gnome.ru/projects/docs/vsdocs.html + * http://www.gnome.ru/projects/docs/slide1.png + * http://www.gnome.ru/projects/docs/slide2.png + */ +public class HDGFDiagram { + private static final String VISIO_HEADER = "Visio (TM) Drawing\r\n"; + + private POIFSFileSystem filesystem; + private byte[] _docstream; + + private short version; + private long docSize; + + private VisioPointer trailerPointer; + private PointerBlock trailer; + + public HDGFDiagram(POIFSFileSystem fs) throws IOException { + filesystem = fs; + + DocumentEntry docProps = + (DocumentEntry)filesystem.getRoot().getEntry("VisioDocument"); + + // Grab the document stream + _docstream = new byte[docProps.getSize()]; + filesystem.createDocumentInputStream("VisioDocument").read(_docstream); + + // Check it's really visio + String typeString = new String(_docstream, 0, 20); + if(! typeString.equals(VISIO_HEADER)) { + throw new IllegalArgumentException("Wasn't a valid visio document, started with " + typeString); + } + + // Grab the version number, 0x1a -> 0x1b + version = LittleEndian.getShort(_docstream, 0x1a); + // Grab the document size, 0x1c -> 0x1f + docSize = LittleEndian.getUInt(_docstream, 0x1c); + // ??? 0x20 -> 0x23 + + // Grab the pointer to the trailer + trailerPointer = VisioPointer.getPointerAt(_docstream, 0x24); + + // And now grab the trailer + trailer = new CompressedPointerBlock(trailerPointer, _docstream); + } + + public void debug() throws IOException { + System.err.println("Trailer is at " + trailerPointer.offset); + System.err.println("Trailer has type " + trailerPointer.type); + System.err.println("Trailer has length " + trailerPointer.length); + System.err.println("Trailer has format " + trailerPointer.format); + + for(int i=0; i 0) { + System.err.println("\tContains " + pb.getPointers().length + " other pointers"); + for(int j=0; j data.length - offset) { + len = data.length - offset; + } + if(offset < 0) { len = 0; } + + contents = new byte[len]; + if(len > 0) + System.arraycopy(data, offset, contents, 0, contents.length); + + // If we're of type 20, we have child pointers + if(len > 0 && (pointer.type == 20 || pointer.destinationHasPointers())) { + // Grab the offset to the number of pointers + int nPointersAt = (int)LittleEndian.getUInt(contents, 0); + int numPointers = (int)LittleEndian.getUInt(contents, nPointersAt); + int unknownA = (int)LittleEndian.getUInt(contents, nPointersAt+4); + + pointers = new VisioPointer[numPointers]; + int pos = nPointersAt + 8; + for(int i=0; i 0 && (pointer.destinationHasStrings())) { + for(int i=0; i<64; i+=1) { + short s = LittleEndian.getShort(contents, i); + long l = LittleEndian.getUInt(contents, i); + System.err.println(i + "\t" + s + "\t" + Integer.toHexString(s)); + System.err.println(i + "\t" + l + "\t" + Long.toHexString(l)); + } + } + } + } + + /** + * A block containing lots of pointers to other blocks, that + * is itself compressed + */ + public static class CompressedPointerBlock extends PointerBlock { + protected byte[] compressedContents; + private byte[] blockHeader = new byte[4]; + + protected CompressedPointerBlock(VisioPointer pointer, byte[] data) throws IOException { + super(pointer); + + compressedContents = new byte[(int)pointer.length]; + System.arraycopy(data, (int)pointer.offset, compressedContents, 0, compressedContents.length); + + // Decompress + ByteArrayInputStream bais = new ByteArrayInputStream(compressedContents); + +// TIFFLZWDecoder lzw = new TIFFLZWDecoder(); +// byte[] out = new byte[4096]; +// contents = lzw.decode(compressedContents, out); + + LZW4HDGF lzw = new LZW4HDGF(); + byte[] decomp = lzw.decode(bais); + System.arraycopy(decomp, 0, blockHeader, 0, 4); + processData(decomp, 4, decomp.length-4); + } + } + + /** + * A visio pointer, for visio versions 6+ + */ + public static class VisioPointer { + private int type; + private long address; + private long offset; + private long length; + private short format; + + public boolean destinationHasStrings() { + return (0x40 <= format && format < 0x50); + } + public boolean destinationHasPointers() { + if(format == 0x1d || format == 0x1e) return true; + return (0x50 <= format && format < 0x60); + } + public boolean destinationHasChunks() { + return (0xd0 <= format && format < 0xd0); + } + + public boolean destinationCompressed() { + // Apparently, it's the second least significant bit + return (format & 2) > 0; + } + + public static VisioPointer getPointerAt(byte[] data, int offset) { + VisioPointer p = new VisioPointer(); + p.type = LittleEndian.getInt(data, offset+0); + p.address = LittleEndian.getUInt(data, offset+4); + p.offset = LittleEndian.getUInt(data, offset+8); + p.length = LittleEndian.getUInt(data, offset+12); + p.format = LittleEndian.getShort(data, offset+16); + + return p; + } + } +} diff --git a/src/scratchpad/src/org/apache/poi/hdgf/LZW4HDGF.java b/src/scratchpad/src/org/apache/poi/hdgf/LZW4HDGF.java new file mode 100644 index 000000000..ad4e6de9a --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hdgf/LZW4HDGF.java @@ -0,0 +1,80 @@ +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +package org.apache.poi.hdgf; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * A decoder for the crazy LZW implementation used + * in Visio. + * This is a port of vsd_inflate.c from vsdump + * (http://www.gnome.ru/projects/vsdump_en.html) + */ +public class LZW4HDGF { + +public byte fromInt(int b) { + if(b < 128) return (byte)b; + return (byte)(b - 256); +} + +public byte[] decode(InputStream src) throws IOException { + ByteArrayOutputStream res = new ByteArrayOutputStream(); + int pos = 0; + int flag; + byte[] buffer = new byte[4096]; + buffer[0] = 0; + + byte data; + int tmp; + int addr1, addr2; + int len, pntr; + + while ( (flag = src.read()) != -1 ) { + for (int mask = 1; mask < 0x100 ; mask <<= 1) { + if ( (flag & mask) > 0) { + if( (tmp = src.read()) != -1) { + buffer[(pos&4095)] = fromInt(tmp); + pos++; + res.write( new byte[] {fromInt(tmp)} ); + } + } else { + tmp = src.read(); + if(tmp == -1) break; + addr1 = tmp; + + tmp = src.read(); + if(tmp == -1) break; + addr2 = tmp; + + len = (addr2 & 15) + 3; + pntr = (addr2 & 240)*16 + addr1; + + if(pntr > 4078) { + pntr = pntr - 4078; + } else { + pntr = pntr + 18; + } + + for(int i=0; i