Replace the HDGW LZW engine with a fully documented, ASL licenced version. (Doesn't do compression yet, but is a much better start for that)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@584414 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6b70b7cfad
commit
786af85cc0
161
src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
Normal file
161
src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.hdgf;
|
||||||
|
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A decoder for the crazy LZW implementation used
|
||||||
|
* in Visio.
|
||||||
|
* According to VSDump, "it's a slightly perverted version of LZW
|
||||||
|
* compression, with inverted meaning of flag byte and 0xFEE as an
|
||||||
|
* 'initial shift'". It uses 12 bit codes
|
||||||
|
* (http://www.gnome.ru/projects/vsdump_en.html)
|
||||||
|
*
|
||||||
|
* Two good resources on LZW are:
|
||||||
|
* http://en.wikipedia.org/wiki/LZW
|
||||||
|
* http://marknelson.us/1989/10/01/lzw-data-compression/
|
||||||
|
*/
|
||||||
|
public class HDGFLZW {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given an integer, turn it into a java byte, handling
|
||||||
|
* the wrapping.
|
||||||
|
* This is a convenience method
|
||||||
|
*/
|
||||||
|
public byte fromInt(int b) {
|
||||||
|
if(b < 128) return (byte)b;
|
||||||
|
return (byte)(b - 256);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Given a java byte, turn it into an integer between 0
|
||||||
|
* and 255 (i.e. handle the unwrapping).
|
||||||
|
* This is a convenience method
|
||||||
|
*/
|
||||||
|
public int fromByte(byte b) {
|
||||||
|
if(b >= 0) return (int)b;
|
||||||
|
return (int)(b + 256);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decompresses the given input stream, returning the array of bytes
|
||||||
|
* of the decompressed input.
|
||||||
|
*/
|
||||||
|
public byte[] decode(InputStream src) throws IOException {
|
||||||
|
ByteArrayOutputStream res = new ByteArrayOutputStream();
|
||||||
|
decode(src,res);
|
||||||
|
return res.toByteArray();
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Perform a streaming decompression of the input.
|
||||||
|
* Works by:
|
||||||
|
* 1) Reading a flag byte, the 8 bits of which tell you if the
|
||||||
|
* following 8 codes are compressed our un-compressed
|
||||||
|
* 2) Consider the 8 bits in turn
|
||||||
|
* 3) If the bit is set, the next code is un-compressed, so
|
||||||
|
* add it to the dictionary and output it
|
||||||
|
* 4) If the bit isn't set, then read in the length and start
|
||||||
|
* position in the dictionary, and output the bytes there
|
||||||
|
* 5) Loop until we've done all 8 bits, then read in the next
|
||||||
|
* flag byte
|
||||||
|
*/
|
||||||
|
public void decode(InputStream src, OutputStream res) throws IOException {
|
||||||
|
// We use 12 bit codes:
|
||||||
|
// * 0-255 are real bytes
|
||||||
|
// * 256-4095 are the substring codes
|
||||||
|
// Java handily initialises our buffer / dictionary
|
||||||
|
// to all zeros
|
||||||
|
byte[] buffer = new byte[4096];
|
||||||
|
|
||||||
|
// How far through the output we've got
|
||||||
|
// (This is normally used &4095, so it nicely wraps)
|
||||||
|
int pos = 0;
|
||||||
|
// The flag byte is treated as its 8 individual
|
||||||
|
// bits, which tell us if the following 8 codes
|
||||||
|
// are compressed or un-compressed
|
||||||
|
int flag;
|
||||||
|
// The mask, between 1 and 255, which is used when
|
||||||
|
// processing each bit of the flag byte in turn
|
||||||
|
int mask;
|
||||||
|
|
||||||
|
// This is a byte as looked up in the dictionary
|
||||||
|
// It needs to be signed, as it'll get passed on to
|
||||||
|
// the output stream
|
||||||
|
byte dataB;
|
||||||
|
// This is an unsigned byte read from the stream
|
||||||
|
// It needs to be unsigned, so that bit stuff works
|
||||||
|
int dataI;
|
||||||
|
// The compressed code sequence is held over 2 bytes
|
||||||
|
int dataIPt1, dataIPt2;
|
||||||
|
// How long a code sequence is, and where in the
|
||||||
|
// dictionary to start at
|
||||||
|
int len, pntr;
|
||||||
|
|
||||||
|
while( (flag = src.read()) != -1 ) {
|
||||||
|
// Compare each bit in our flag byte in turn:
|
||||||
|
for(mask = 1; mask < 256 ; mask <<= 1) {
|
||||||
|
// Is this a new code (un-compressed), or
|
||||||
|
// the use of existing codes (compressed)?
|
||||||
|
if( (flag & mask) > 0 ) {
|
||||||
|
// Retrieve the un-compressed code
|
||||||
|
if( (dataI = src.read()) != -1) {
|
||||||
|
// Save the byte into the dictionary
|
||||||
|
buffer[(pos&4095)] = fromInt(dataI);
|
||||||
|
pos++;
|
||||||
|
// And output the byte
|
||||||
|
res.write( new byte[] {fromInt(dataI)} );
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// We have a compressed sequence
|
||||||
|
// Grab the next 16 bits of data
|
||||||
|
dataIPt1 = src.read();
|
||||||
|
dataIPt2 = src.read();
|
||||||
|
if(dataIPt1 == -1 || dataIPt2 == -1) break;
|
||||||
|
|
||||||
|
// Build up how long the code sequence is, and
|
||||||
|
// what position of the code to start at
|
||||||
|
// (The position is the first 12 bits, the
|
||||||
|
// length is the last 4 bits)
|
||||||
|
len = (dataIPt2 & 15) + 3;
|
||||||
|
pntr = (dataIPt2 & 240)*16 + dataIPt1;
|
||||||
|
|
||||||
|
// If the pointer happens to be passed the end
|
||||||
|
// of our buffer, then wrap around
|
||||||
|
if(pntr > 4078) {
|
||||||
|
pntr = pntr - 4078;
|
||||||
|
} else {
|
||||||
|
pntr = pntr + 18;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loop over the codes, outputting what they correspond to
|
||||||
|
for(int i=0; i<len; i++) {
|
||||||
|
buffer [(pos + i) & 4095] = buffer [(pntr + i) & 4095];
|
||||||
|
dataB = buffer[(pntr + i) & 4095];
|
||||||
|
res.write(new byte[] {dataB});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record how far along the stream we have moved
|
||||||
|
pos = pos + len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,80 +0,0 @@
|
|||||||
/* ====================================================================
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation; version 3 of the License.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
==================================================================== */
|
|
||||||
package org.apache.poi.hdgf;
|
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A decoder for the crazy LZW implementation used
|
|
||||||
* in Visio.
|
|
||||||
* This is a port of vsd_inflate.c from vsdump
|
|
||||||
* (http://www.gnome.ru/projects/vsdump_en.html)
|
|
||||||
*/
|
|
||||||
public class LZW4HDGF {
|
|
||||||
|
|
||||||
public byte fromInt(int b) {
|
|
||||||
if(b < 128) return (byte)b;
|
|
||||||
return (byte)(b - 256);
|
|
||||||
}
|
|
||||||
|
|
||||||
public byte[] decode(InputStream src) throws IOException {
|
|
||||||
ByteArrayOutputStream res = new ByteArrayOutputStream();
|
|
||||||
int pos = 0;
|
|
||||||
int flag;
|
|
||||||
byte[] buffer = new byte[4096];
|
|
||||||
buffer[0] = 0;
|
|
||||||
|
|
||||||
byte data;
|
|
||||||
int tmp;
|
|
||||||
int addr1, addr2;
|
|
||||||
int len, pntr;
|
|
||||||
|
|
||||||
while ( (flag = src.read()) != -1 ) {
|
|
||||||
for (int mask = 1; mask < 0x100 ; mask <<= 1) {
|
|
||||||
if ( (flag & mask) > 0) {
|
|
||||||
if( (tmp = src.read()) != -1) {
|
|
||||||
buffer[(pos&4095)] = fromInt(tmp);
|
|
||||||
pos++;
|
|
||||||
res.write( new byte[] {fromInt(tmp)} );
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
tmp = src.read();
|
|
||||||
if(tmp == -1) break;
|
|
||||||
addr1 = tmp;
|
|
||||||
|
|
||||||
tmp = src.read();
|
|
||||||
if(tmp == -1) break;
|
|
||||||
addr2 = tmp;
|
|
||||||
|
|
||||||
len = (addr2 & 15) + 3;
|
|
||||||
pntr = (addr2 & 240)*16 + addr1;
|
|
||||||
|
|
||||||
if(pntr > 4078) {
|
|
||||||
pntr = pntr - 4078;
|
|
||||||
} else {
|
|
||||||
pntr = pntr + 18;
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int i=0; i<len; i++) {
|
|
||||||
buffer [(pos + i) & 4095] = buffer [(pntr + i) & 4095];
|
|
||||||
data = buffer[(pntr + i ) & 4095];
|
|
||||||
res.write(new byte[] {data});
|
|
||||||
}
|
|
||||||
|
|
||||||
pos = pos + len;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return res.toByteArray();
|
|
||||||
}
|
|
||||||
}
|
|
@ -19,7 +19,7 @@ package org.apache.poi.hdgf.streams;
|
|||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.poi.hdgf.LZW4HDGF;
|
import org.apache.poi.hdgf.HDGFLZW;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A StreamStore where the data on-disk is compressed,
|
* A StreamStore where the data on-disk is compressed,
|
||||||
@ -76,7 +76,7 @@ public class CompressedStreamStore extends StreamStore {
|
|||||||
ByteArrayInputStream bais = new ByteArrayInputStream(data, offset, length);
|
ByteArrayInputStream bais = new ByteArrayInputStream(data, offset, length);
|
||||||
|
|
||||||
// Decompress
|
// Decompress
|
||||||
LZW4HDGF lzw = new LZW4HDGF();
|
HDGFLZW lzw = new HDGFLZW();
|
||||||
byte[] decompressed = lzw.decode(bais);
|
byte[] decompressed = lzw.decode(bais);
|
||||||
|
|
||||||
// Split into header and contents
|
// Split into header and contents
|
||||||
|
@ -20,7 +20,7 @@ import java.io.ByteArrayInputStream;
|
|||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
public class TestLZW4HDGF extends TestCase {
|
public class TestHDGFLZW extends TestCase {
|
||||||
public static final byte[] testTrailerComp = new byte[] {
|
public static final byte[] testTrailerComp = new byte[] {
|
||||||
123, -60, 2, -21, -16, 1, 0, 0, -72, -13, -16, 78, -32, -5, 1,
|
123, -60, 2, -21, -16, 1, 0, 0, -72, -13, -16, 78, -32, -5, 1,
|
||||||
0, 3, -21, -16, 10, 5, 4, -21, -16, 21, 9, -21, -16, 103, -21,
|
0, 3, -21, -16, 10, 5, 4, -21, -16, 21, 9, -21, -16, 103, -21,
|
||||||
@ -86,8 +86,8 @@ public class TestLZW4HDGF extends TestCase {
|
|||||||
assertEquals(632, testTrailerDecomp.length);
|
assertEquals(632, testTrailerDecomp.length);
|
||||||
|
|
||||||
// Decode it using our engine
|
// Decode it using our engine
|
||||||
LZW4HDGF lzw2 = new LZW4HDGF();
|
HDGFLZW lzw = new HDGFLZW();
|
||||||
byte[] dec = lzw2.decode(new ByteArrayInputStream(testTrailerComp));
|
byte[] dec = lzw.decode(new ByteArrayInputStream(testTrailerComp));
|
||||||
|
|
||||||
// Check it's of the right size
|
// Check it's of the right size
|
||||||
assertEquals(632, dec.length);
|
assertEquals(632, dec.length);
|
Loading…
Reference in New Issue
Block a user