Replace the HDGW LZW engine with a fully documented, ASL licenced version. (Doesn't do compression yet, but is a much better start for that)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@584414 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6b70b7cfad
commit
786af85cc0
161
src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
Normal file
161
src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
Normal file
@ -0,0 +1,161 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hdgf;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
/**
|
||||
* A decoder for the crazy LZW implementation used
|
||||
* in Visio.
|
||||
* According to VSDump, "it's a slightly perverted version of LZW
|
||||
* compression, with inverted meaning of flag byte and 0xFEE as an
|
||||
* 'initial shift'". It uses 12 bit codes
|
||||
* (http://www.gnome.ru/projects/vsdump_en.html)
|
||||
*
|
||||
* Two good resources on LZW are:
|
||||
* http://en.wikipedia.org/wiki/LZW
|
||||
* http://marknelson.us/1989/10/01/lzw-data-compression/
|
||||
*/
|
||||
public class HDGFLZW {
|
||||
|
||||
/**
|
||||
* Given an integer, turn it into a java byte, handling
|
||||
* the wrapping.
|
||||
* This is a convenience method
|
||||
*/
|
||||
public byte fromInt(int b) {
|
||||
if(b < 128) return (byte)b;
|
||||
return (byte)(b - 256);
|
||||
}
|
||||
/**
|
||||
* Given a java byte, turn it into an integer between 0
|
||||
* and 255 (i.e. handle the unwrapping).
|
||||
* This is a convenience method
|
||||
*/
|
||||
public int fromByte(byte b) {
|
||||
if(b >= 0) return (int)b;
|
||||
return (int)(b + 256);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decompresses the given input stream, returning the array of bytes
|
||||
* of the decompressed input.
|
||||
*/
|
||||
public byte[] decode(InputStream src) throws IOException {
|
||||
ByteArrayOutputStream res = new ByteArrayOutputStream();
|
||||
decode(src,res);
|
||||
return res.toByteArray();
|
||||
}
|
||||
/**
|
||||
* Perform a streaming decompression of the input.
|
||||
* Works by:
|
||||
* 1) Reading a flag byte, the 8 bits of which tell you if the
|
||||
* following 8 codes are compressed our un-compressed
|
||||
* 2) Consider the 8 bits in turn
|
||||
* 3) If the bit is set, the next code is un-compressed, so
|
||||
* add it to the dictionary and output it
|
||||
* 4) If the bit isn't set, then read in the length and start
|
||||
* position in the dictionary, and output the bytes there
|
||||
* 5) Loop until we've done all 8 bits, then read in the next
|
||||
* flag byte
|
||||
*/
|
||||
public void decode(InputStream src, OutputStream res) throws IOException {
|
||||
// We use 12 bit codes:
|
||||
// * 0-255 are real bytes
|
||||
// * 256-4095 are the substring codes
|
||||
// Java handily initialises our buffer / dictionary
|
||||
// to all zeros
|
||||
byte[] buffer = new byte[4096];
|
||||
|
||||
// How far through the output we've got
|
||||
// (This is normally used &4095, so it nicely wraps)
|
||||
int pos = 0;
|
||||
// The flag byte is treated as its 8 individual
|
||||
// bits, which tell us if the following 8 codes
|
||||
// are compressed or un-compressed
|
||||
int flag;
|
||||
// The mask, between 1 and 255, which is used when
|
||||
// processing each bit of the flag byte in turn
|
||||
int mask;
|
||||
|
||||
// This is a byte as looked up in the dictionary
|
||||
// It needs to be signed, as it'll get passed on to
|
||||
// the output stream
|
||||
byte dataB;
|
||||
// This is an unsigned byte read from the stream
|
||||
// It needs to be unsigned, so that bit stuff works
|
||||
int dataI;
|
||||
// The compressed code sequence is held over 2 bytes
|
||||
int dataIPt1, dataIPt2;
|
||||
// How long a code sequence is, and where in the
|
||||
// dictionary to start at
|
||||
int len, pntr;
|
||||
|
||||
while( (flag = src.read()) != -1 ) {
|
||||
// Compare each bit in our flag byte in turn:
|
||||
for(mask = 1; mask < 256 ; mask <<= 1) {
|
||||
// Is this a new code (un-compressed), or
|
||||
// the use of existing codes (compressed)?
|
||||
if( (flag & mask) > 0 ) {
|
||||
// Retrieve the un-compressed code
|
||||
if( (dataI = src.read()) != -1) {
|
||||
// Save the byte into the dictionary
|
||||
buffer[(pos&4095)] = fromInt(dataI);
|
||||
pos++;
|
||||
// And output the byte
|
||||
res.write( new byte[] {fromInt(dataI)} );
|
||||
}
|
||||
} else {
|
||||
// We have a compressed sequence
|
||||
// Grab the next 16 bits of data
|
||||
dataIPt1 = src.read();
|
||||
dataIPt2 = src.read();
|
||||
if(dataIPt1 == -1 || dataIPt2 == -1) break;
|
||||
|
||||
// Build up how long the code sequence is, and
|
||||
// what position of the code to start at
|
||||
// (The position is the first 12 bits, the
|
||||
// length is the last 4 bits)
|
||||
len = (dataIPt2 & 15) + 3;
|
||||
pntr = (dataIPt2 & 240)*16 + dataIPt1;
|
||||
|
||||
// If the pointer happens to be passed the end
|
||||
// of our buffer, then wrap around
|
||||
if(pntr > 4078) {
|
||||
pntr = pntr - 4078;
|
||||
} else {
|
||||
pntr = pntr + 18;
|
||||
}
|
||||
|
||||
// Loop over the codes, outputting what they correspond to
|
||||
for(int i=0; i<len; i++) {
|
||||
buffer [(pos + i) & 4095] = buffer [(pntr + i) & 4095];
|
||||
dataB = buffer[(pntr + i) & 4095];
|
||||
res.write(new byte[] {dataB});
|
||||
}
|
||||
|
||||
// Record how far along the stream we have moved
|
||||
pos = pos + len;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,80 +0,0 @@
|
||||
/* ====================================================================
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; version 3 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hdgf;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* A decoder for the crazy LZW implementation used
|
||||
* in Visio.
|
||||
* This is a port of vsd_inflate.c from vsdump
|
||||
* (http://www.gnome.ru/projects/vsdump_en.html)
|
||||
*/
|
||||
public class LZW4HDGF {
|
||||
|
||||
public byte fromInt(int b) {
|
||||
if(b < 128) return (byte)b;
|
||||
return (byte)(b - 256);
|
||||
}
|
||||
|
||||
public byte[] decode(InputStream src) throws IOException {
|
||||
ByteArrayOutputStream res = new ByteArrayOutputStream();
|
||||
int pos = 0;
|
||||
int flag;
|
||||
byte[] buffer = new byte[4096];
|
||||
buffer[0] = 0;
|
||||
|
||||
byte data;
|
||||
int tmp;
|
||||
int addr1, addr2;
|
||||
int len, pntr;
|
||||
|
||||
while ( (flag = src.read()) != -1 ) {
|
||||
for (int mask = 1; mask < 0x100 ; mask <<= 1) {
|
||||
if ( (flag & mask) > 0) {
|
||||
if( (tmp = src.read()) != -1) {
|
||||
buffer[(pos&4095)] = fromInt(tmp);
|
||||
pos++;
|
||||
res.write( new byte[] {fromInt(tmp)} );
|
||||
}
|
||||
} else {
|
||||
tmp = src.read();
|
||||
if(tmp == -1) break;
|
||||
addr1 = tmp;
|
||||
|
||||
tmp = src.read();
|
||||
if(tmp == -1) break;
|
||||
addr2 = tmp;
|
||||
|
||||
len = (addr2 & 15) + 3;
|
||||
pntr = (addr2 & 240)*16 + addr1;
|
||||
|
||||
if(pntr > 4078) {
|
||||
pntr = pntr - 4078;
|
||||
} else {
|
||||
pntr = pntr + 18;
|
||||
}
|
||||
|
||||
for(int i=0; i<len; i++) {
|
||||
buffer [(pos + i) & 4095] = buffer [(pntr + i) & 4095];
|
||||
data = buffer[(pntr + i ) & 4095];
|
||||
res.write(new byte[] {data});
|
||||
}
|
||||
|
||||
pos = pos + len;
|
||||
}
|
||||
}
|
||||
}
|
||||
return res.toByteArray();
|
||||
}
|
||||
}
|
@ -19,7 +19,7 @@ package org.apache.poi.hdgf.streams;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.poi.hdgf.LZW4HDGF;
|
||||
import org.apache.poi.hdgf.HDGFLZW;
|
||||
|
||||
/**
|
||||
* A StreamStore where the data on-disk is compressed,
|
||||
@ -76,7 +76,7 @@ public class CompressedStreamStore extends StreamStore {
|
||||
ByteArrayInputStream bais = new ByteArrayInputStream(data, offset, length);
|
||||
|
||||
// Decompress
|
||||
LZW4HDGF lzw = new LZW4HDGF();
|
||||
HDGFLZW lzw = new HDGFLZW();
|
||||
byte[] decompressed = lzw.decode(bais);
|
||||
|
||||
// Split into header and contents
|
||||
|
@ -20,7 +20,7 @@ import java.io.ByteArrayInputStream;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
public class TestLZW4HDGF extends TestCase {
|
||||
public class TestHDGFLZW extends TestCase {
|
||||
public static final byte[] testTrailerComp = new byte[] {
|
||||
123, -60, 2, -21, -16, 1, 0, 0, -72, -13, -16, 78, -32, -5, 1,
|
||||
0, 3, -21, -16, 10, 5, 4, -21, -16, 21, 9, -21, -16, 103, -21,
|
||||
@ -86,8 +86,8 @@ public class TestLZW4HDGF extends TestCase {
|
||||
assertEquals(632, testTrailerDecomp.length);
|
||||
|
||||
// Decode it using our engine
|
||||
LZW4HDGF lzw2 = new LZW4HDGF();
|
||||
byte[] dec = lzw2.decode(new ByteArrayInputStream(testTrailerComp));
|
||||
HDGFLZW lzw = new HDGFLZW();
|
||||
byte[] dec = lzw.decode(new ByteArrayInputStream(testTrailerComp));
|
||||
|
||||
// Check it's of the right size
|
||||
assertEquals(632, dec.length);
|
Loading…
Reference in New Issue
Block a user