Stub out the HDGF compression routine, and tests for it. Document the first slab of the compressed data in the test, so it's easy to see if we're generating it properly. (Have yet to implement the compression support yet though)

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@584534 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2007-10-14 12:42:47 +00:00
parent 786af85cc0
commit 7774616a7d
2 changed files with 187 additions and 8 deletions

View File

@ -40,7 +40,7 @@ public class HDGFLZW {
* the wrapping.
* This is a convenience method
*/
public byte fromInt(int b) {
public static byte fromInt(int b) {
if(b < 128) return (byte)b;
return (byte)(b - 256);
}
@ -49,11 +49,21 @@ public byte fromInt(int b) {
* and 255 (i.e. handle the unwrapping).
* This is a convenience method
*/
public int fromByte(byte b) {
public static int fromByte(byte b) {
if(b >= 0) return (int)b;
return (int)(b + 256);
}
/**
* Compress the given input stream, returning the array of bytes
* of the compressed input
*/
public byte[] compress(InputStream src) throws IOException {
ByteArrayOutputStream res = new ByteArrayOutputStream();
compress(src,res);
return res.toByteArray();
}
/**
* Decompresses the given input stream, returning the array of bytes
* of the decompressed input.
@ -135,7 +145,7 @@ public void decode(InputStream src, OutputStream res) throws IOException {
// length is the last 4 bits)
len = (dataIPt2 & 15) + 3;
pntr = (dataIPt2 & 240)*16 + dataIPt1;
// If the pointer happens to be passed the end
// of our buffer, then wrap around
if(pntr > 4078) {
@ -158,4 +168,88 @@ public void decode(InputStream src, OutputStream res) throws IOException {
}
}
/**
* Performs the Visio compatible streaming LZW compression.
* Works by:
* 1) ???
* 2) ???
* TODO - Finish
*/
public void compress(InputStream src, OutputStream res) throws IOException {
// We use 12 bit codes:
// * 0-255 are real bytes
// * 256-4095 are the substring codes
// Java handily initialises our buffer / dictionary
// to all zeros
byte[] dict = new byte[4096];
// The next block of data to be written out, minus
// its mask byte
byte[] buffer = new byte[16];
// And how long it is
// (Un-compressed codes are 1 byte each, compressed codes
// are two)
int bufferLen = 0;
// How far through the input and output streams we are
int posInp = 0;
int posOut = 0;
// What the next mask byte to output will be
int nextMask = 0;
// And how many bits we've already set
int maskBitsSet = 0;
// This is a byte as looked up in the dictionary
// It needs to be signed, as it'll get passed on to
// the output stream
byte dataB;
// This is an unsigned byte read from the stream
// It needs to be unsigned, so that bit stuff works
int dataI;
// Have we hit the end of the file yet?
boolean going = true;
while( going ) {
dataI = src.read();
posInp++;
if(dataI == -1) { going = false; }
// Decide if we're going to output uncompressed or compressed
// for this byte
// (It takes 2 bytes to hold a compressed code, so it's only
// worth doing for 3+ byte long sequences)
// TODO
boolean compressThis = true;
if(compressThis) {
// Set the mask bit for us
nextMask += (1<<maskBitsSet);
// And add us to the buffer + dictionary
buffer[bufferLen] = fromInt(dataI);
bufferLen++;
dict[(posOut&4095)] = fromInt(dataI);
posOut++;
} else {
// ????
}
// Increment the mask bit count, we've done another code
maskBitsSet++;
// If we've just done the 8th bit, or reached the end
// of the stream, output our mask and data
if(maskBitsSet == 8 || !going) {
// Output
res.write(new byte[] { fromInt(nextMask) } );
res.write(buffer, 0, bufferLen);
// Reset things
nextMask = 0;
maskBitsSet = 0;
bufferLen = 0;
}
}
}
}

View File

@ -22,10 +22,34 @@ import junit.framework.TestCase;
public class TestHDGFLZW extends TestCase {
public static final byte[] testTrailerComp = new byte[] {
123, -60, 2, -21, -16, 1, 0, 0, -72, -13, -16, 78, -32, -5, 1,
0, 3, -21, -16, 10, 5, 4, -21, -16, 21, 9, -21, -16, 103, -21,
-16, 34, -36, -1, 52, 15, 70, 15, 120, 88, 15, -7, -2, -28, -9,
-123, 21, 0, 44, -122, 1, -4, 104, 15, -24, -13, 40, -98, 32,
123, // *mask bit*
-60, 2,
-21, -16, // 3 @ 4093
1, 0, 0, -72,
-13, -16, // 3 @ 5
78, // *mask bit*
-32, -5, // 14 @ 4082
1, 0, 3,
-21, -16, // 3 @ 4093
10, 5, // 8 @ 28
4,
-21, -16, // 3 @ 4093
21, // *mask bit*
9,
-21, -16, // 3 @ 4093
103, -21, -16, 34,
-36, -1, // 18 @ 4078
52, 15, // 18 @ 70
70, 15, // 18 @ 88
120, // *mask bit*
88, 15, // 18 @ 106
-7, -2, // 17 @ 11
-28, -9, // 10 @ 4086
-123, 21, 0, 44,
-122, 1, // 4 @ 152
-4, // *mask bit*
104, 15, // 18 @ 122
-24, -13, 40, -98, 32,
78, 102, -67, -1, -2, -30, 64, 40, -67, -113, -73, 116, -98,
-85, 2, 66, 123, 9, 109, -85, 2, -89, 14, -56, -69, -83, -79,
-34, -3, 120, 110, 75, -9, -10, 20, -6, -25, -12, 22, -21, -16,
@ -81,6 +105,33 @@ public class TestHDGFLZW extends TestCase {
0, 0, 42, 1, 0, 0, 84, 0, 0, 0, 0, 0
};
public void testFromToInt() throws Exception {
byte b255 = -1;
assertEquals(255, HDGFLZW.fromByte(b255));
assertEquals(-1, HDGFLZW.fromInt( HDGFLZW.fromByte(b255) ));
assertEquals(-1, HDGFLZW.fromInt( 255 ));
byte b11 = 11;
assertEquals(11, HDGFLZW.fromByte(b11));
assertEquals(11, HDGFLZW.fromInt( HDGFLZW.fromByte(b11) ));
assertEquals(11, HDGFLZW.fromInt( 11 ));
byte b0 = 0;
assertEquals(0, HDGFLZW.fromByte(b0));
assertEquals(0, HDGFLZW.fromInt( HDGFLZW.fromByte(b0) ));
assertEquals(0, HDGFLZW.fromInt( 0 ));
byte b127 = 127;
assertEquals(127, HDGFLZW.fromByte(b127));
assertEquals(127, HDGFLZW.fromInt( HDGFLZW.fromByte(b127) ));
assertEquals(127, HDGFLZW.fromInt( 127 ));
byte b128 = -128;
assertEquals(128, HDGFLZW.fromByte(b128));
assertEquals(-128, HDGFLZW.fromInt( HDGFLZW.fromByte(b128) ));
assertEquals(-128, HDGFLZW.fromInt( 128 ));
}
public void testCounts() throws Exception {
assertEquals(339, testTrailerComp.length);
assertEquals(632, testTrailerDecomp.length);
@ -91,11 +142,45 @@ public class TestHDGFLZW extends TestCase {
// Check it's of the right size
assertEquals(632, dec.length);
/*
// Encode it again using our engine
byte[] comp = lzw.compress(new ByteArrayInputStream(testTrailerDecomp));
// Now check it matches
// Check it's of the right size
assertEquals(339, comp.length);
*/
}
public void testDecompress() throws Exception {
assertEquals(339, testTrailerComp.length);
assertEquals(632, testTrailerDecomp.length);
// Decode it using our engine
HDGFLZW lzw = new HDGFLZW();
byte[] dec = lzw.decode(new ByteArrayInputStream(testTrailerComp));
// Now check it's the right data
assertEquals(632, dec.length);
for(int i=0; i<dec.length; i++) {
if(dec[i] != testTrailerDecomp[i])
System.err.println(i + "\t" + dec[i] + "\t" + testTrailerDecomp[i]);
}
}
public void DISABLEDtestCompress() throws Exception {
assertEquals(339, testTrailerComp.length);
assertEquals(632, testTrailerDecomp.length);
// Compress it using our engine
HDGFLZW lzw = new HDGFLZW();
byte[] comp = lzw.compress(new ByteArrayInputStream(testTrailerDecomp));
// Now check it's the right data
assertEquals(339, comp.length);
for(int i=0; i<comp.length; i++) {
if(comp[i] != testTrailerComp[i])
System.err.println(i + "\t" + comp[i] + "\t" + testTrailerComp[i]);
}
}
}