From a1ed3f51e4e2e8c8c4097046fe5f7225ee604979 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Sat, 27 Oct 2007 22:50:41 +0000 Subject: [PATCH] A bit more on HDGF LZW compression, but it's still not quite complete git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@589233 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/org/apache/poi/hdgf/HDGFLZW.java | 200 ++++++++++++++---- 1 file changed, 164 insertions(+), 36 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java index 2b1280837..049a768dc 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java @@ -170,18 +170,27 @@ public void decode(InputStream src, OutputStream res) throws IOException { /** * Performs the Visio compatible streaming LZW compression. - * Works by: - * 1) ??? - * 2) ??? * TODO - Finish */ public void compress(InputStream src, OutputStream res) throws IOException { + Compressor c = new Compressor(); + c.compress(src, res); +} + +/** + * Helper class to handle the Visio compatible + * streaming LZW compression. + * Need our own class to handle keeping track of the + * code buffer, pending bytes to write out etc. + */ +private class Compressor { // We use 12 bit codes: // * 0-255 are real bytes // * 256-4095 are the substring codes // Java handily initialises our buffer / dictionary // to all zeros byte[] dict = new byte[4096]; + // The next block of data to be written out, minus // its mask byte byte[] buffer = new byte[16]; @@ -190,6 +199,11 @@ public void compress(InputStream src, OutputStream res) throws IOException { // are two) int bufferLen = 0; + // The raw length of a code is limited to 4 bits + byte[] rawCode = new byte[16]; + // And how much we're using + int rawCodeLen = 0; + // How far through the input and output streams we are int posInp = 0; int posOut = 0; @@ -199,6 +213,101 @@ public void compress(InputStream src, OutputStream res) throws IOException { // And how many bits we've already set int maskBitsSet = 0; +/** + * Returns the last place that the bytes from rawCode are found + * at in the buffer, or -1 if they can't be found + */ +private int findRawCodeInBuffer() { + // Work our way back from the end + // (Visio always seems to use the last possible code) + for(int i=(buffer.length - rawCodeLen); i>=0; i--) { + boolean matches = true; + for(int j=0; matches && j 0) { + outputCompressed(res); + break; } - // Increment the mask bit count, we've done another code - maskBitsSet++; + + // Try adding this new byte onto rawCode, and + // see if all of that is still found in the + // buffer dictionary or not + rawCode[rawCodeLen] = dataB; + rawCodeLen++; + int rawAt = findRawCodeInBuffer(); - // If we've just done the 8th bit, or reached the end - // of the stream, output our mask and data - if(maskBitsSet == 8 || !going) { - // Output - res.write(new byte[] { fromInt(nextMask) } ); - res.write(buffer, 0, bufferLen); + // If we found it and are now at 16 bytes, + // we need to output our pending code block + if(rawCodeLen == 16 && rawAt > -1) { + outputCompressed(res); + rawCodeLen = 0; + continue; + } + + // If we did find all of rawCode with our new + // byte added on, we can wait to see what happens + // with the next byte + if(rawAt > -1) { + continue; + } + + // If there was something in rawCode before, then we + // need to output that + rawCodeLen--; + if(rawCodeLen > 0) { + // Output the old rawCode + outputCompressed(res); - // Reset things - nextMask = 0; - maskBitsSet = 0; - bufferLen = 0; + // Can this byte start a new rawCode, or does + // it need outputting itself? + rawCode[0] = dataB; + rawCodeLen = 1; + if(findRawCodeInBuffer() > -1) { + // Fits in, wait for next byte + continue; + } else { + // Doesn't fit, output + outputUncompressed(dataB,res); + rawCodeLen = 0; + } + } else { + // Nothing in rawCode before, so this byte + // isn't in the buffer dictionary + // Output it un-compressed + outputUncompressed(dataB,res); } } } +} } \ No newline at end of file