Add a TNEF extractor class, which can be run from the command line to extract out the message body and attachments

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1076607 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2011-03-03 13:01:07 +00:00
parent aa1963cd3b
commit efe5a63429
3 changed files with 166 additions and 36 deletions

View File

@ -31,17 +31,35 @@ import org.apache.poi.util.StringUtil;
* to a {@link HMEFMessage} or one of its {@link Attachment}s.
*/
public final class MAPIRtfAttribute extends MAPIAttribute {
private final byte[] decompressed;
private final String data;
public MAPIRtfAttribute(MAPIProperty property, int type, byte[] data) throws IOException {
super(property, type, data);
CompressedRTF rtf = new CompressedRTF();
byte[] decomp = rtf.decompress(new ByteArrayInputStream(data));
this.decompressed = rtf.decompress(new ByteArrayInputStream(data));
this.data = StringUtil.getFromCompressedUnicode(decomp, 0, decomp.length);
this.data = StringUtil.getFromCompressedUnicode(decompressed, 0, decompressed.length);
}
/**
* Returns the original, compressed RTF
*/
public byte[] getRawData() {
return super.getData();
}
/**
* Returns the raw uncompressed RTF data
*/
public byte[] getData() {
return decompressed;
}
/**
* Returns the uncompressed RTF as a string
*/
public String getDataString() {
return data;
}

View File

@ -0,0 +1,109 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hmef.extractor;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.poi.hmef.Attachment;
import org.apache.poi.hmef.HMEFMessage;
import org.apache.poi.hmef.attribute.MAPIRtfAttribute;
import org.apache.poi.hsmf.datatypes.MAPIProperty;
/**
* A utility for extracting out the message body, and all attachments
* from a HMEF/TNEF/winmail.dat file
*/
public final class HMEFContentsExtractor {
public static void main(String[] args) throws Exception {
if(args.length < 2) {
System.err.println("Use:");
System.err.println(" HMEFContentsExtractor <filename> <output dir>");
System.err.println("");
System.err.println("");
System.err.println("Where <filename> is the winmail.dat file to extract,");
System.err.println(" and <output dir> is where to place the extracted files");
System.exit(2);
}
HMEFContentsExtractor ext = new HMEFContentsExtractor(new File(args[0]));
File dir = new File(args[1]);
File rtf = new File(dir, "message.rtf");
if(! dir.exists()) {
throw new FileNotFoundException("Output directory " + dir.getName() + " not found");
}
System.out.println("Extracting...");
ext.extractMessageBody(rtf);
ext.extractAttachments(dir);
System.out.println("Extraction completed");
}
private HMEFMessage message;
public HMEFContentsExtractor(File filename) throws IOException {
this(new HMEFMessage(new FileInputStream(filename)));
}
public HMEFContentsExtractor(HMEFMessage message) {
this.message = message;
}
/**
* Extracts the RTF message body to the supplied file
*/
public void extractMessageBody(File dest) throws IOException {
FileOutputStream fout = new FileOutputStream(dest);
MAPIRtfAttribute body = (MAPIRtfAttribute)
message.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
fout.write(body.getData());
fout.close();
}
/**
* Extracts all the message attachments to the supplied directory
*/
public void extractAttachments(File dir) throws IOException {
int count = 0;
for(Attachment att : message.getAttachments()) {
count++;
// Decide what to call it
String filename = att.getLongFilename();
if(filename == null || filename.length() == 0) {
filename = att.getFilename();
}
if(filename == null || filename.length() == 0) {
filename = "attachment" + count;
if(att.getExtension() != null) {
filename += att.getExtension();
}
}
// Save it
File file = new File(dir, filename);
FileOutputStream fout = new FileOutputStream(file);
fout.write( att.getContents() );
fout.close();
}
}
}

View File

@ -48,44 +48,45 @@ public final class TestCompressedRTF extends TestCase {
assertTrue(rtfAttr instanceof MAPIRtfAttribute);
// Check the start of the compressed version
assertEquals(5907, rtfAttr.getData().length);
byte[] data = ((MAPIRtfAttribute)rtfAttr).getRawData();
assertEquals(5907, data.length);
// First 16 bytes is header stuff
// Check it has the length + compressed marker
assertEquals(5907-4, LittleEndian.getShort(rtfAttr.getData()));
assertEquals(5907-4, LittleEndian.getShort(data));
assertEquals(
"LZFu",
StringUtil.getFromCompressedUnicode(rtfAttr.getData(), 8, 4)
StringUtil.getFromCompressedUnicode(data, 8, 4)
);
// Now Look at the code
assertEquals((byte)0x07, rtfAttr.getData()[16+0]); // Flag: cccUUUUU
assertEquals((byte)0x00, rtfAttr.getData()[16+1]); // c1a: offset 0 / 0x000
assertEquals((byte)0x06, rtfAttr.getData()[16+2]); // c1b: length 6+2 -> {\rtf1\a
assertEquals((byte)0x01, rtfAttr.getData()[16+3]); // c2a: offset 16 / 0x010
assertEquals((byte)0x01, rtfAttr.getData()[16+4]); // c2b: length 1+2 -> def
assertEquals((byte)0x0b, rtfAttr.getData()[16+5]); // c3a: offset 182 / 0xb6
assertEquals((byte)0x60, rtfAttr.getData()[16+6]); // c3b: length 0+2 -> la
assertEquals((byte)0x6e, rtfAttr.getData()[16+7]); // n
assertEquals((byte)0x67, rtfAttr.getData()[16+8]); // g
assertEquals((byte)0x31, rtfAttr.getData()[16+9]); // 1
assertEquals((byte)0x30, rtfAttr.getData()[16+10]); // 0
assertEquals((byte)0x32, rtfAttr.getData()[16+11]); // 2
assertEquals((byte)0x07, data[16+0]); // Flag: cccUUUUU
assertEquals((byte)0x00, data[16+1]); // c1a: offset 0 / 0x000
assertEquals((byte)0x06, data[16+2]); // c1b: length 6+2 -> {\rtf1\a
assertEquals((byte)0x01, data[16+3]); // c2a: offset 16 / 0x010
assertEquals((byte)0x01, data[16+4]); // c2b: length 1+2 -> def
assertEquals((byte)0x0b, data[16+5]); // c3a: offset 182 / 0xb6
assertEquals((byte)0x60, data[16+6]); // c3b: length 0+2 -> la
assertEquals((byte)0x6e, data[16+7]); // n
assertEquals((byte)0x67, data[16+8]); // g
assertEquals((byte)0x31, data[16+9]); // 1
assertEquals((byte)0x30, data[16+10]); // 0
assertEquals((byte)0x32, data[16+11]); // 2
assertEquals((byte)0x66, rtfAttr.getData()[16+12]); // Flag: UccUUccU
assertEquals((byte)0x35, rtfAttr.getData()[16+13]); // 5
assertEquals((byte)0x00, rtfAttr.getData()[16+14]); // c2a: offset 6 / 0x006
assertEquals((byte)0x64, rtfAttr.getData()[16+15]); // c2b: length 4+2 -> \ansi\a
assertEquals((byte)0x00, rtfAttr.getData()[16+16]); // c3a: offset 7 / 0x007
assertEquals((byte)0x72, rtfAttr.getData()[16+17]); // c3b: length 2+2 -> nsi
assertEquals((byte)0x63, rtfAttr.getData()[16+18]); // c
assertEquals((byte)0x70, rtfAttr.getData()[16+19]); // p
assertEquals((byte)0x0d, rtfAttr.getData()[16+20]); // c6a: offset 221 / 0x0dd
assertEquals((byte)0xd0, rtfAttr.getData()[16+21]); // c6b: length 0+2 -> g1
assertEquals((byte)0x0e, rtfAttr.getData()[16+22]); // c7a: offset 224 / 0x0e0
assertEquals((byte)0x00, rtfAttr.getData()[16+23]); // c7b: length 0+2 -> 25
assertEquals((byte)0x32, rtfAttr.getData()[16+24]); // 2
assertEquals((byte)0x66, data[16+12]); // Flag: UccUUccU
assertEquals((byte)0x35, data[16+13]); // 5
assertEquals((byte)0x00, data[16+14]); // c2a: offset 6 / 0x006
assertEquals((byte)0x64, data[16+15]); // c2b: length 4+2 -> \ansi\a
assertEquals((byte)0x00, data[16+16]); // c3a: offset 7 / 0x007
assertEquals((byte)0x72, data[16+17]); // c3b: length 2+2 -> nsi
assertEquals((byte)0x63, data[16+18]); // c
assertEquals((byte)0x70, data[16+19]); // p
assertEquals((byte)0x0d, data[16+20]); // c6a: offset 221 / 0x0dd
assertEquals((byte)0xd0, data[16+21]); // c6b: length 0+2 -> g1
assertEquals((byte)0x0e, data[16+22]); // c7a: offset 224 / 0x0e0
assertEquals((byte)0x00, data[16+23]); // c7b: length 0+2 -> 25
assertEquals((byte)0x32, data[16+24]); // 2
}
/**
@ -97,12 +98,13 @@ public final class TestCompressedRTF extends TestCase {
_samples.openResourceAsStream("quick-winmail.dat")
);
MAPIAttribute rtfAttr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
assertNotNull(rtfAttr);
MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
assertNotNull(attr);
MAPIRtfAttribute rtfAttr = (MAPIRtfAttribute)attr;
// Truncate to header + flag + data for flag
byte[] data = new byte[16+12];
System.arraycopy(rtfAttr.getData(), 0, data, 0, data.length);
System.arraycopy(rtfAttr.getRawData(), 0, data, 0, data.length);
// Decompress it
CompressedRTF comp = new CompressedRTF();
@ -124,12 +126,13 @@ System.err.println(decompStr);
_samples.openResourceAsStream("quick-winmail.dat")
);
MAPIAttribute rtfAttr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
assertNotNull(rtfAttr);
MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
assertNotNull(attr);
MAPIRtfAttribute rtfAttr = (MAPIRtfAttribute)attr;
// Truncate to header + flag + data for flag + flag + data
byte[] data = new byte[16+12+13];
System.arraycopy(rtfAttr.getData(), 0, data, 0, data.length);
System.arraycopy(rtfAttr.getRawData(), 0, data, 0, data.length);
// Decompress it
CompressedRTF comp = new CompressedRTF();