Strip trailing padding from HMEF compressed rtf when decoding
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1081414 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
865efc9a8c
commit
a638f0a9ad
@ -31,9 +31,6 @@ import org.apache.poi.util.LittleEndian;
|
|||||||
* Within a {@link HMEFMessage}, the content is often
|
* Within a {@link HMEFMessage}, the content is often
|
||||||
* stored in as RTF, but LZW compressed. This class
|
* stored in as RTF, but LZW compressed. This class
|
||||||
* handles decompressing it for you.
|
* handles decompressing it for you.
|
||||||
*
|
|
||||||
* Note - this doesn't quite decompress the data correctly,
|
|
||||||
* more work and unit testing is required...
|
|
||||||
*/
|
*/
|
||||||
public final class CompressedRTF extends LZWDecompresser {
|
public final class CompressedRTF extends LZWDecompresser {
|
||||||
public static final byte[] COMPRESSED_SIGNATURE =
|
public static final byte[] COMPRESSED_SIGNATURE =
|
||||||
@ -52,6 +49,9 @@ public final class CompressedRTF extends LZWDecompresser {
|
|||||||
"{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}{\\f0\\fnil \\froman \\fswiss " +
|
"{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}{\\f0\\fnil \\froman \\fswiss " +
|
||||||
"\\fmodern \\fscript \\fdecor MS Sans SerifSymbolArialTimes New RomanCourier" +
|
"\\fmodern \\fscript \\fdecor MS Sans SerifSymbolArialTimes New RomanCourier" +
|
||||||
"{\\colortbl\\red0\\green0\\blue0\n\r\\par \\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx";
|
"{\\colortbl\\red0\\green0\\blue0\n\r\\par \\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx";
|
||||||
|
|
||||||
|
private int compressedSize;
|
||||||
|
private int decompressedSize;
|
||||||
|
|
||||||
public CompressedRTF() {
|
public CompressedRTF() {
|
||||||
// Out flag has the normal meaning
|
// Out flag has the normal meaning
|
||||||
@ -60,10 +60,18 @@ public final class CompressedRTF extends LZWDecompresser {
|
|||||||
super(true, 2, true);
|
super(true, 2, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decompresses the whole of the compressed RTF
|
||||||
|
* stream, outputting the resulting RTF bytes.
|
||||||
|
* Note - will decompress any padding at the end of
|
||||||
|
* the input, if present, use {@link #getDeCompressedSize()}
|
||||||
|
* if you need to know how much of the result is
|
||||||
|
* real. (Padding may be up to 7 bytes).
|
||||||
|
*/
|
||||||
public void decompress(InputStream src, OutputStream res) throws IOException {
|
public void decompress(InputStream src, OutputStream res) throws IOException {
|
||||||
// Validate the header on the front of the RTF
|
// Validate the header on the front of the RTF
|
||||||
int compressedSize = LittleEndian.readInt(src);
|
compressedSize = LittleEndian.readInt(src);
|
||||||
int uncompressedSize = LittleEndian.readInt(src);
|
decompressedSize = LittleEndian.readInt(src);
|
||||||
int compressionType = LittleEndian.readInt(src);
|
int compressionType = LittleEndian.readInt(src);
|
||||||
int dataCRC = LittleEndian.readInt(src);
|
int dataCRC = LittleEndian.readInt(src);
|
||||||
|
|
||||||
@ -82,6 +90,21 @@ public final class CompressedRTF extends LZWDecompresser {
|
|||||||
// Have it processed
|
// Have it processed
|
||||||
super.decompress(src, res);
|
super.decompress(src, res);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns how big the compressed version was.
|
||||||
|
*/
|
||||||
|
public int getCompressedSize() {
|
||||||
|
// Return the size less the header
|
||||||
|
return compressedSize - 12;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns how big the decompressed version was.
|
||||||
|
*/
|
||||||
|
public int getDeCompressedSize() {
|
||||||
|
return decompressedSize;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* We use regular dictionary offsets, so no
|
* We use regular dictionary offsets, so no
|
||||||
|
@ -37,9 +37,17 @@ public final class MAPIRtfAttribute extends MAPIAttribute {
|
|||||||
public MAPIRtfAttribute(MAPIProperty property, int type, byte[] data) throws IOException {
|
public MAPIRtfAttribute(MAPIProperty property, int type, byte[] data) throws IOException {
|
||||||
super(property, type, data);
|
super(property, type, data);
|
||||||
|
|
||||||
|
// Decompress it, removing any trailing padding as needed
|
||||||
CompressedRTF rtf = new CompressedRTF();
|
CompressedRTF rtf = new CompressedRTF();
|
||||||
this.decompressed = rtf.decompress(new ByteArrayInputStream(data));
|
byte[] tmp = rtf.decompress(new ByteArrayInputStream(data));
|
||||||
|
if(tmp.length > rtf.getDeCompressedSize()) {
|
||||||
|
this.decompressed = new byte[rtf.getDeCompressedSize()];
|
||||||
|
System.arraycopy(tmp, 0, decompressed, 0, decompressed.length);
|
||||||
|
} else {
|
||||||
|
this.decompressed = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Turn the RTF data into a more useful string
|
||||||
this.data = StringUtil.getFromCompressedUnicode(decompressed, 0, decompressed.length);
|
this.data = StringUtil.getFromCompressedUnicode(decompressed, 0, decompressed.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -148,7 +148,7 @@ public final class TestCompressedRTF extends TestCase {
|
|||||||
* Check that we can correctly decode the whole file
|
* Check that we can correctly decode the whole file
|
||||||
* TODO Fix what looks like a padding issue
|
* TODO Fix what looks like a padding issue
|
||||||
*/
|
*/
|
||||||
public void DISABLEDtestFull() throws Exception {
|
public void testFull() throws Exception {
|
||||||
HMEFMessage msg = new HMEFMessage(
|
HMEFMessage msg = new HMEFMessage(
|
||||||
_samples.openResourceAsStream("quick-winmail.dat")
|
_samples.openResourceAsStream("quick-winmail.dat")
|
||||||
);
|
);
|
||||||
@ -160,11 +160,26 @@ public final class TestCompressedRTF extends TestCase {
|
|||||||
byte[] expected = IOUtils.toByteArray(
|
byte[] expected = IOUtils.toByteArray(
|
||||||
_samples.openResourceAsStream("quick-contents/message.rtf")
|
_samples.openResourceAsStream("quick-contents/message.rtf")
|
||||||
);
|
);
|
||||||
byte[] decomp = rtfAttr.getData();
|
|
||||||
|
CompressedRTF comp = new CompressedRTF();
|
||||||
|
byte[] data = rtfAttr.getRawData();
|
||||||
|
byte[] decomp = comp.decompress(new ByteArrayInputStream(data));
|
||||||
|
|
||||||
|
// Check the length was as expected
|
||||||
|
assertEquals(data.length, comp.getCompressedSize() + 16);
|
||||||
|
assertEquals(expected.length, comp.getDeCompressedSize());
|
||||||
|
|
||||||
|
// Will have been padded though
|
||||||
|
assertEquals(expected.length+2, decomp.length);
|
||||||
|
byte[] tmp = new byte[expected.length];
|
||||||
|
System.arraycopy(decomp, 0, tmp, 0, tmp.length);
|
||||||
|
decomp = tmp;
|
||||||
|
|
||||||
// By byte
|
// By byte
|
||||||
assertEquals(expected.length, decomp.length);
|
assertEquals(expected.length, decomp.length);
|
||||||
assertEquals(expected, decomp);
|
for(int i=0; i<expected.length; i++) {
|
||||||
|
assertEquals(expected[i], decomp[i]);
|
||||||
|
}
|
||||||
|
|
||||||
// By String
|
// By String
|
||||||
String expString = new String(expected, "ASCII");
|
String expString = new String(expected, "ASCII");
|
||||||
|
@ -103,9 +103,8 @@ public final class TestHMEFMessage extends HMEFTest {
|
|||||||
/**
|
/**
|
||||||
* Checks that the compressed RTF message contents
|
* Checks that the compressed RTF message contents
|
||||||
* can be correctly extracted
|
* can be correctly extracted
|
||||||
* TODO Fix what looks like a padding issue
|
|
||||||
*/
|
*/
|
||||||
public void DISABLEDtestMessageContents() throws Exception {
|
public void testMessageContents() throws Exception {
|
||||||
HMEFMessage msg = new HMEFMessage(
|
HMEFMessage msg = new HMEFMessage(
|
||||||
_samples.openResourceAsStream("quick-winmail.dat")
|
_samples.openResourceAsStream("quick-winmail.dat")
|
||||||
);
|
);
|
||||||
|
Loading…
Reference in New Issue
Block a user