Add a TNEF extractor class, which can be run from the command line to extract out the message body and attachments
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1076607 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
aa1963cd3b
commit
efe5a63429
@ -31,17 +31,35 @@ import org.apache.poi.util.StringUtil;
|
||||
* to a {@link HMEFMessage} or one of its {@link Attachment}s.
|
||||
*/
|
||||
public final class MAPIRtfAttribute extends MAPIAttribute {
|
||||
private final byte[] decompressed;
|
||||
private final String data;
|
||||
|
||||
public MAPIRtfAttribute(MAPIProperty property, int type, byte[] data) throws IOException {
|
||||
super(property, type, data);
|
||||
|
||||
CompressedRTF rtf = new CompressedRTF();
|
||||
byte[] decomp = rtf.decompress(new ByteArrayInputStream(data));
|
||||
this.decompressed = rtf.decompress(new ByteArrayInputStream(data));
|
||||
|
||||
this.data = StringUtil.getFromCompressedUnicode(decomp, 0, decomp.length);
|
||||
this.data = StringUtil.getFromCompressedUnicode(decompressed, 0, decompressed.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the original, compressed RTF
|
||||
*/
|
||||
public byte[] getRawData() {
|
||||
return super.getData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the raw uncompressed RTF data
|
||||
*/
|
||||
public byte[] getData() {
|
||||
return decompressed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the uncompressed RTF as a string
|
||||
*/
|
||||
public String getDataString() {
|
||||
return data;
|
||||
}
|
||||
|
@ -0,0 +1,109 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.hmef.extractor;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.poi.hmef.Attachment;
|
||||
import org.apache.poi.hmef.HMEFMessage;
|
||||
import org.apache.poi.hmef.attribute.MAPIRtfAttribute;
|
||||
import org.apache.poi.hsmf.datatypes.MAPIProperty;
|
||||
|
||||
/**
|
||||
* A utility for extracting out the message body, and all attachments
|
||||
* from a HMEF/TNEF/winmail.dat file
|
||||
*/
|
||||
public final class HMEFContentsExtractor {
|
||||
public static void main(String[] args) throws Exception {
|
||||
if(args.length < 2) {
|
||||
System.err.println("Use:");
|
||||
System.err.println(" HMEFContentsExtractor <filename> <output dir>");
|
||||
System.err.println("");
|
||||
System.err.println("");
|
||||
System.err.println("Where <filename> is the winmail.dat file to extract,");
|
||||
System.err.println(" and <output dir> is where to place the extracted files");
|
||||
System.exit(2);
|
||||
}
|
||||
|
||||
HMEFContentsExtractor ext = new HMEFContentsExtractor(new File(args[0]));
|
||||
|
||||
File dir = new File(args[1]);
|
||||
File rtf = new File(dir, "message.rtf");
|
||||
if(! dir.exists()) {
|
||||
throw new FileNotFoundException("Output directory " + dir.getName() + " not found");
|
||||
}
|
||||
|
||||
System.out.println("Extracting...");
|
||||
ext.extractMessageBody(rtf);
|
||||
ext.extractAttachments(dir);
|
||||
System.out.println("Extraction completed");
|
||||
}
|
||||
|
||||
private HMEFMessage message;
|
||||
public HMEFContentsExtractor(File filename) throws IOException {
|
||||
this(new HMEFMessage(new FileInputStream(filename)));
|
||||
}
|
||||
public HMEFContentsExtractor(HMEFMessage message) {
|
||||
this.message = message;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the RTF message body to the supplied file
|
||||
*/
|
||||
public void extractMessageBody(File dest) throws IOException {
|
||||
FileOutputStream fout = new FileOutputStream(dest);
|
||||
|
||||
MAPIRtfAttribute body = (MAPIRtfAttribute)
|
||||
message.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
|
||||
fout.write(body.getData());
|
||||
|
||||
fout.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts all the message attachments to the supplied directory
|
||||
*/
|
||||
public void extractAttachments(File dir) throws IOException {
|
||||
int count = 0;
|
||||
for(Attachment att : message.getAttachments()) {
|
||||
count++;
|
||||
|
||||
// Decide what to call it
|
||||
String filename = att.getLongFilename();
|
||||
if(filename == null || filename.length() == 0) {
|
||||
filename = att.getFilename();
|
||||
}
|
||||
if(filename == null || filename.length() == 0) {
|
||||
filename = "attachment" + count;
|
||||
if(att.getExtension() != null) {
|
||||
filename += att.getExtension();
|
||||
}
|
||||
}
|
||||
|
||||
// Save it
|
||||
File file = new File(dir, filename);
|
||||
FileOutputStream fout = new FileOutputStream(file);
|
||||
fout.write( att.getContents() );
|
||||
fout.close();
|
||||
}
|
||||
}
|
||||
}
|
@ -48,44 +48,45 @@ public final class TestCompressedRTF extends TestCase {
|
||||
assertTrue(rtfAttr instanceof MAPIRtfAttribute);
|
||||
|
||||
// Check the start of the compressed version
|
||||
assertEquals(5907, rtfAttr.getData().length);
|
||||
byte[] data = ((MAPIRtfAttribute)rtfAttr).getRawData();
|
||||
assertEquals(5907, data.length);
|
||||
|
||||
// First 16 bytes is header stuff
|
||||
// Check it has the length + compressed marker
|
||||
assertEquals(5907-4, LittleEndian.getShort(rtfAttr.getData()));
|
||||
assertEquals(5907-4, LittleEndian.getShort(data));
|
||||
assertEquals(
|
||||
"LZFu",
|
||||
StringUtil.getFromCompressedUnicode(rtfAttr.getData(), 8, 4)
|
||||
StringUtil.getFromCompressedUnicode(data, 8, 4)
|
||||
);
|
||||
|
||||
|
||||
// Now Look at the code
|
||||
assertEquals((byte)0x07, rtfAttr.getData()[16+0]); // Flag: cccUUUUU
|
||||
assertEquals((byte)0x00, rtfAttr.getData()[16+1]); // c1a: offset 0 / 0x000
|
||||
assertEquals((byte)0x06, rtfAttr.getData()[16+2]); // c1b: length 6+2 -> {\rtf1\a
|
||||
assertEquals((byte)0x01, rtfAttr.getData()[16+3]); // c2a: offset 16 / 0x010
|
||||
assertEquals((byte)0x01, rtfAttr.getData()[16+4]); // c2b: length 1+2 -> def
|
||||
assertEquals((byte)0x0b, rtfAttr.getData()[16+5]); // c3a: offset 182 / 0xb6
|
||||
assertEquals((byte)0x60, rtfAttr.getData()[16+6]); // c3b: length 0+2 -> la
|
||||
assertEquals((byte)0x6e, rtfAttr.getData()[16+7]); // n
|
||||
assertEquals((byte)0x67, rtfAttr.getData()[16+8]); // g
|
||||
assertEquals((byte)0x31, rtfAttr.getData()[16+9]); // 1
|
||||
assertEquals((byte)0x30, rtfAttr.getData()[16+10]); // 0
|
||||
assertEquals((byte)0x32, rtfAttr.getData()[16+11]); // 2
|
||||
assertEquals((byte)0x07, data[16+0]); // Flag: cccUUUUU
|
||||
assertEquals((byte)0x00, data[16+1]); // c1a: offset 0 / 0x000
|
||||
assertEquals((byte)0x06, data[16+2]); // c1b: length 6+2 -> {\rtf1\a
|
||||
assertEquals((byte)0x01, data[16+3]); // c2a: offset 16 / 0x010
|
||||
assertEquals((byte)0x01, data[16+4]); // c2b: length 1+2 -> def
|
||||
assertEquals((byte)0x0b, data[16+5]); // c3a: offset 182 / 0xb6
|
||||
assertEquals((byte)0x60, data[16+6]); // c3b: length 0+2 -> la
|
||||
assertEquals((byte)0x6e, data[16+7]); // n
|
||||
assertEquals((byte)0x67, data[16+8]); // g
|
||||
assertEquals((byte)0x31, data[16+9]); // 1
|
||||
assertEquals((byte)0x30, data[16+10]); // 0
|
||||
assertEquals((byte)0x32, data[16+11]); // 2
|
||||
|
||||
assertEquals((byte)0x66, rtfAttr.getData()[16+12]); // Flag: UccUUccU
|
||||
assertEquals((byte)0x35, rtfAttr.getData()[16+13]); // 5
|
||||
assertEquals((byte)0x00, rtfAttr.getData()[16+14]); // c2a: offset 6 / 0x006
|
||||
assertEquals((byte)0x64, rtfAttr.getData()[16+15]); // c2b: length 4+2 -> \ansi\a
|
||||
assertEquals((byte)0x00, rtfAttr.getData()[16+16]); // c3a: offset 7 / 0x007
|
||||
assertEquals((byte)0x72, rtfAttr.getData()[16+17]); // c3b: length 2+2 -> nsi
|
||||
assertEquals((byte)0x63, rtfAttr.getData()[16+18]); // c
|
||||
assertEquals((byte)0x70, rtfAttr.getData()[16+19]); // p
|
||||
assertEquals((byte)0x0d, rtfAttr.getData()[16+20]); // c6a: offset 221 / 0x0dd
|
||||
assertEquals((byte)0xd0, rtfAttr.getData()[16+21]); // c6b: length 0+2 -> g1
|
||||
assertEquals((byte)0x0e, rtfAttr.getData()[16+22]); // c7a: offset 224 / 0x0e0
|
||||
assertEquals((byte)0x00, rtfAttr.getData()[16+23]); // c7b: length 0+2 -> 25
|
||||
assertEquals((byte)0x32, rtfAttr.getData()[16+24]); // 2
|
||||
assertEquals((byte)0x66, data[16+12]); // Flag: UccUUccU
|
||||
assertEquals((byte)0x35, data[16+13]); // 5
|
||||
assertEquals((byte)0x00, data[16+14]); // c2a: offset 6 / 0x006
|
||||
assertEquals((byte)0x64, data[16+15]); // c2b: length 4+2 -> \ansi\a
|
||||
assertEquals((byte)0x00, data[16+16]); // c3a: offset 7 / 0x007
|
||||
assertEquals((byte)0x72, data[16+17]); // c3b: length 2+2 -> nsi
|
||||
assertEquals((byte)0x63, data[16+18]); // c
|
||||
assertEquals((byte)0x70, data[16+19]); // p
|
||||
assertEquals((byte)0x0d, data[16+20]); // c6a: offset 221 / 0x0dd
|
||||
assertEquals((byte)0xd0, data[16+21]); // c6b: length 0+2 -> g1
|
||||
assertEquals((byte)0x0e, data[16+22]); // c7a: offset 224 / 0x0e0
|
||||
assertEquals((byte)0x00, data[16+23]); // c7b: length 0+2 -> 25
|
||||
assertEquals((byte)0x32, data[16+24]); // 2
|
||||
}
|
||||
|
||||
/**
|
||||
@ -97,12 +98,13 @@ public final class TestCompressedRTF extends TestCase {
|
||||
_samples.openResourceAsStream("quick-winmail.dat")
|
||||
);
|
||||
|
||||
MAPIAttribute rtfAttr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
|
||||
assertNotNull(rtfAttr);
|
||||
MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
|
||||
assertNotNull(attr);
|
||||
MAPIRtfAttribute rtfAttr = (MAPIRtfAttribute)attr;
|
||||
|
||||
// Truncate to header + flag + data for flag
|
||||
byte[] data = new byte[16+12];
|
||||
System.arraycopy(rtfAttr.getData(), 0, data, 0, data.length);
|
||||
System.arraycopy(rtfAttr.getRawData(), 0, data, 0, data.length);
|
||||
|
||||
// Decompress it
|
||||
CompressedRTF comp = new CompressedRTF();
|
||||
@ -124,12 +126,13 @@ System.err.println(decompStr);
|
||||
_samples.openResourceAsStream("quick-winmail.dat")
|
||||
);
|
||||
|
||||
MAPIAttribute rtfAttr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
|
||||
assertNotNull(rtfAttr);
|
||||
MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
|
||||
assertNotNull(attr);
|
||||
MAPIRtfAttribute rtfAttr = (MAPIRtfAttribute)attr;
|
||||
|
||||
// Truncate to header + flag + data for flag + flag + data
|
||||
byte[] data = new byte[16+12+13];
|
||||
System.arraycopy(rtfAttr.getData(), 0, data, 0, data.length);
|
||||
System.arraycopy(rtfAttr.getRawData(), 0, data, 0, data.length);
|
||||
|
||||
// Decompress it
|
||||
CompressedRTF comp = new CompressedRTF();
|
||||
|
Loading…
Reference in New Issue
Block a user