Two more differences between the LZW in HDGF and HMEF:
* Little Endian vs Big Endian storage of the code position * Initial dictionary position is the end of pre-fill, if there is one, rather than always being position 0 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1078300 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2c4134e89b
commit
0df8c11c4b
@ -41,17 +41,30 @@ public abstract class LZWDecompresser {
|
|||||||
* to get the real code length? Normally 2 or 3
|
* to get the real code length? Normally 2 or 3
|
||||||
*/
|
*/
|
||||||
private final int codeLengthIncrease;
|
private final int codeLengthIncrease;
|
||||||
|
/**
|
||||||
|
* Does the 12 bits of the position get stored in
|
||||||
|
* Little Endian or Big Endian form?
|
||||||
|
* This controls whether a pos+length of 0x12 0x34
|
||||||
|
* becomes a position of 0x123 or 0x312
|
||||||
|
*/
|
||||||
|
private final boolean positionIsBigEndian;
|
||||||
|
|
||||||
protected LZWDecompresser(boolean maskMeansCompressed, int codeLengthIncrease) {
|
protected LZWDecompresser(boolean maskMeansCompressed,
|
||||||
|
int codeLengthIncrease, boolean positionIsBigEndian) {
|
||||||
this.maskMeansCompressed = maskMeansCompressed;
|
this.maskMeansCompressed = maskMeansCompressed;
|
||||||
this.codeLengthIncrease = codeLengthIncrease;
|
this.codeLengthIncrease = codeLengthIncrease;
|
||||||
|
this.positionIsBigEndian = positionIsBigEndian;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Populates the dictionary. May not need
|
* Populates the dictionary, and returns where in it
|
||||||
* to do anything if all zeros is fine.
|
* to begin writing new codes.
|
||||||
|
* Generally, if the dictionary is pre-populated, then new
|
||||||
|
* codes should be placed at the end of that block.
|
||||||
|
* Equally, if the dictionary is left with all zeros, then
|
||||||
|
* usually the new codes can go in at the start.
|
||||||
*/
|
*/
|
||||||
protected abstract void populateDictionary(byte[] dict);
|
protected abstract int populateDictionary(byte[] dict);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adjusts the position offset if needed when looking
|
* Adjusts the position offset if needed when looking
|
||||||
@ -83,17 +96,10 @@ public abstract class LZWDecompresser {
|
|||||||
* flag byte
|
* flag byte
|
||||||
*/
|
*/
|
||||||
public void decompress(InputStream src, OutputStream res) throws IOException {
|
public void decompress(InputStream src, OutputStream res) throws IOException {
|
||||||
// We use 12 bit codes:
|
|
||||||
// * 0-255 are real bytes
|
|
||||||
// * 256-4095 are the substring codes
|
|
||||||
// Java handily initialises our buffer / dictionary
|
|
||||||
// to all zeros
|
|
||||||
byte[] buffer = new byte[4096];
|
|
||||||
populateDictionary(buffer);
|
|
||||||
|
|
||||||
// How far through the output we've got
|
// How far through the output we've got
|
||||||
// (This is normally used &4095, so it nicely wraps)
|
// (This is normally used &4095, so it nicely wraps)
|
||||||
int pos = 0;
|
// The initial value is set when populating the dictionary
|
||||||
|
int pos;
|
||||||
// The flag byte is treated as its 8 individual
|
// The flag byte is treated as its 8 individual
|
||||||
// bits, which tell us if the following 8 codes
|
// bits, which tell us if the following 8 codes
|
||||||
// are compressed or un-compressed
|
// are compressed or un-compressed
|
||||||
@ -102,10 +108,18 @@ public abstract class LZWDecompresser {
|
|||||||
// processing each bit of the flag byte in turn
|
// processing each bit of the flag byte in turn
|
||||||
int mask;
|
int mask;
|
||||||
|
|
||||||
|
// We use 12 bit codes:
|
||||||
|
// * 0-255 are real bytes
|
||||||
|
// * 256-4095 are the substring codes
|
||||||
|
// Java handily initialises our buffer / dictionary
|
||||||
|
// to all zeros
|
||||||
|
byte[] buffer = new byte[4096];
|
||||||
|
pos = populateDictionary(buffer);
|
||||||
|
|
||||||
// These are bytes as looked up in the dictionary
|
// These are bytes as looked up in the dictionary
|
||||||
// It needs to be signed, as it'll get passed on to
|
// It needs to be signed, as it'll get passed on to
|
||||||
// the output stream
|
// the output stream
|
||||||
byte[] dataB = new byte[19];
|
byte[] dataB = new byte[16+codeLengthIncrease];
|
||||||
// This is an unsigned byte read from the stream
|
// This is an unsigned byte read from the stream
|
||||||
// It needs to be unsigned, so that bit stuff works
|
// It needs to be unsigned, so that bit stuff works
|
||||||
int dataI;
|
int dataI;
|
||||||
@ -121,7 +135,7 @@ public abstract class LZWDecompresser {
|
|||||||
// Is this a new code (un-compressed), or
|
// Is this a new code (un-compressed), or
|
||||||
// the use of existing codes (compressed)?
|
// the use of existing codes (compressed)?
|
||||||
boolean isMaskSet = (flag & mask) > 0;
|
boolean isMaskSet = (flag & mask) > 0;
|
||||||
if( isMaskSet && !maskMeansCompressed ) {
|
if( isMaskSet ^ maskMeansCompressed ) {
|
||||||
// Retrieve the un-compressed code
|
// Retrieve the un-compressed code
|
||||||
if( (dataI = src.read()) != -1) {
|
if( (dataI = src.read()) != -1) {
|
||||||
// Save the byte into the dictionary
|
// Save the byte into the dictionary
|
||||||
@ -139,10 +153,14 @@ public abstract class LZWDecompresser {
|
|||||||
|
|
||||||
// Build up how long the code sequence is, and
|
// Build up how long the code sequence is, and
|
||||||
// what position of the code to start at
|
// what position of the code to start at
|
||||||
// (The position is the first 12 bits, the
|
// (The position is the usually the first 12 bits,
|
||||||
// length is the last 4 bits)
|
// and the length is usually the last 4 bits)
|
||||||
len = (dataIPt2 & 15) + codeLengthIncrease;
|
len = (dataIPt2 & 15) + codeLengthIncrease;
|
||||||
pntr = (dataIPt2 & 240)*16 + dataIPt1;
|
if(positionIsBigEndian) {
|
||||||
|
pntr = (dataIPt1<<4) + (dataIPt2>>4);
|
||||||
|
} else {
|
||||||
|
pntr = dataIPt1 + ((dataIPt2&0xF0)<<4);
|
||||||
|
}
|
||||||
|
|
||||||
// Adjust the pointer as needed
|
// Adjust the pointer as needed
|
||||||
pntr = adjustDictionaryOffset(pntr);
|
pntr = adjustDictionaryOffset(pntr);
|
||||||
|
@ -37,8 +37,10 @@ import org.apache.poi.util.LZWDecompresser;
|
|||||||
*/
|
*/
|
||||||
public class HDGFLZW extends LZWDecompresser {
|
public class HDGFLZW extends LZWDecompresser {
|
||||||
public HDGFLZW() {
|
public HDGFLZW() {
|
||||||
// We're the wrong way round!
|
// Out flag is the wrong way round!
|
||||||
super(false, 3);
|
// Length wise, we're 3 longer than we say, so the max len is 19
|
||||||
|
// Endian wise, we're little endian, so 0x1234 is pos 0x312
|
||||||
|
super(false, 3, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -68,7 +70,8 @@ public class HDGFLZW extends LZWDecompresser {
|
|||||||
* We want an empty dictionary, so do nothing
|
* We want an empty dictionary, so do nothing
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
protected void populateDictionary(byte[] dict) {
|
protected int populateDictionary(byte[] dict) {
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -54,7 +54,10 @@ public final class CompressedRTF extends LZWDecompresser {
|
|||||||
"{\\colortbl\\red0\\green0\\blue0\n\r\\par \\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx";
|
"{\\colortbl\\red0\\green0\\blue0\n\r\\par \\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx";
|
||||||
|
|
||||||
public CompressedRTF() {
|
public CompressedRTF() {
|
||||||
super(true, 2);
|
// Out flag has the normal meaning
|
||||||
|
// Length wise, we're 2 longer than we say, so the max len is 18
|
||||||
|
// Endian wise, we're big endian, so 0x1234 is pos 0x123
|
||||||
|
super(true, 2, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void decompress(InputStream src, OutputStream res) throws IOException {
|
public void decompress(InputStream src, OutputStream res) throws IOException {
|
||||||
@ -80,17 +83,24 @@ public final class CompressedRTF extends LZWDecompresser {
|
|||||||
super.decompress(src, res);
|
super.decompress(src, res);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We use regular dictionary offsets, so no
|
||||||
|
* need to change anything
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
protected int adjustDictionaryOffset(int offset) {
|
protected int adjustDictionaryOffset(int offset) {
|
||||||
// TODO Do we need to change anything?
|
return offset;
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void populateDictionary(byte[] dict) {
|
protected int populateDictionary(byte[] dict) {
|
||||||
try {
|
try {
|
||||||
|
// Copy in the RTF constants
|
||||||
byte[] preload = LZW_RTF_PRELOAD.getBytes("US-ASCII");
|
byte[] preload = LZW_RTF_PRELOAD.getBytes("US-ASCII");
|
||||||
System.arraycopy(preload, 0, dict, 0, preload.length);
|
System.arraycopy(preload, 0, dict, 0, preload.length);
|
||||||
|
|
||||||
|
// Start adding new codes after the constants
|
||||||
|
return preload.length;
|
||||||
} catch(UnsupportedEncodingException e) {
|
} catch(UnsupportedEncodingException e) {
|
||||||
throw new RuntimeException("Your JVM is broken as it doesn't support US ASCII");
|
throw new RuntimeException("Your JVM is broken as it doesn't support US ASCII");
|
||||||
}
|
}
|
||||||
|
@ -93,7 +93,7 @@ public final class TestCompressedRTF extends TestCase {
|
|||||||
* Check that we can decode the first 8 codes
|
* Check that we can decode the first 8 codes
|
||||||
* (1 flag byte + 8 codes)
|
* (1 flag byte + 8 codes)
|
||||||
*/
|
*/
|
||||||
public void DISABLEDtestFirstBlock() throws Exception {
|
public void testFirstBlock() throws Exception {
|
||||||
HMEFMessage msg = new HMEFMessage(
|
HMEFMessage msg = new HMEFMessage(
|
||||||
_samples.openResourceAsStream("quick-winmail.dat")
|
_samples.openResourceAsStream("quick-winmail.dat")
|
||||||
);
|
);
|
||||||
@ -112,7 +112,6 @@ public final class TestCompressedRTF extends TestCase {
|
|||||||
String decompStr = new String(decomp, "ASCII");
|
String decompStr = new String(decomp, "ASCII");
|
||||||
|
|
||||||
// Test
|
// Test
|
||||||
System.err.println(decompStr);
|
|
||||||
assertEquals(block1.length(), decomp.length);
|
assertEquals(block1.length(), decomp.length);
|
||||||
assertEquals(block1, decompStr);
|
assertEquals(block1, decompStr);
|
||||||
}
|
}
|
||||||
@ -121,7 +120,7 @@ System.err.println(decompStr);
|
|||||||
* Check that we can decode the first 16 codes
|
* Check that we can decode the first 16 codes
|
||||||
* (flag + 8 codes, flag + 8 codes)
|
* (flag + 8 codes, flag + 8 codes)
|
||||||
*/
|
*/
|
||||||
public void DISABLEDtestFirstTwoBlocks() throws Exception {
|
public void testFirstTwoBlocks() throws Exception {
|
||||||
HMEFMessage msg = new HMEFMessage(
|
HMEFMessage msg = new HMEFMessage(
|
||||||
_samples.openResourceAsStream("quick-winmail.dat")
|
_samples.openResourceAsStream("quick-winmail.dat")
|
||||||
);
|
);
|
||||||
@ -140,7 +139,6 @@ System.err.println(decompStr);
|
|||||||
String decompStr = new String(decomp, "ASCII");
|
String decompStr = new String(decomp, "ASCII");
|
||||||
|
|
||||||
// Test
|
// Test
|
||||||
System.err.println(decompStr);
|
|
||||||
assertEquals(block2.length(), decomp.length);
|
assertEquals(block2.length(), decomp.length);
|
||||||
assertEquals(block2, decompStr);
|
assertEquals(block2, decompStr);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user