Start on major HSMF refactoring. Should compile, but not quite all tests pass as a little bit of work is left

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@896914 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-01-07 16:15:20 +00:00
parent e5884f2f66
commit 2bb376f55b
16 changed files with 666 additions and 471 deletions

View File

@ -37,7 +37,7 @@ import org.apache.poi.poifs.property.Property;
public class DirectoryNode
extends EntryNode
implements DirectoryEntry, POIFSViewable
implements DirectoryEntry, POIFSViewable, Iterable<Entry>
{
// Map of Entry instances, keyed by their names
@ -452,6 +452,13 @@ public class DirectoryNode
return getName();
}
/**
* Returns an Iterator over all the entries
*/
public Iterator<Entry> iterator() {
return getEntries();
}
/* ********** END begin implementation of POIFSViewable ********** */
} // end public class DirectoryNode

View File

@ -21,10 +21,15 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Map;
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.datatypes.Chunk;
import org.apache.poi.hsmf.datatypes.ChunkGroup;
import org.apache.poi.hsmf.datatypes.Chunks;
import org.apache.poi.hsmf.datatypes.NameIdChunks;
import org.apache.poi.hsmf.datatypes.RecipientChunks;
import org.apache.poi.hsmf.datatypes.StringChunk;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
import org.apache.poi.hsmf.parsers.POIFSChunkParser;
@ -36,9 +41,12 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
* @author Travis Ferguson
*/
public class MAPIMessage {
private POIFSChunkParser chunkParser;
private POIFSFileSystem fs;
private Chunks chunks;
private Chunks mainChunks;
private NameIdChunks nameIdChunks;
private RecipientChunks recipientChunks;
private AttachmentChunks[] attachmentChunks;
/**
* Constructor for creating new files.
@ -64,35 +72,59 @@ public class MAPIMessage {
* @throws IOException
*/
public MAPIMessage(InputStream in) throws IOException {
this.fs = new POIFSFileSystem(in);
chunkParser = new POIFSChunkParser(this.fs);
// Figure out the right string type, based on
// the chunks present
chunks = chunkParser.identifyChunks();
this(new POIFSFileSystem(in));
}
/**
* Constructor for reading MSG Files from an input stream.
* @param in
* @throws IOException
*/
public MAPIMessage(POIFSFileSystem fs) throws IOException {
this.fs = fs;
// Grab all the chunks
ChunkGroup[] chunkGroups = POIFSChunkParser.parse(this.fs);
// Grab interesting bits
ArrayList<AttachmentChunks> attachments = new ArrayList<AttachmentChunks>();
for(ChunkGroup group : chunkGroups) {
// Should only ever be one of these
if(group instanceof Chunks) {
mainChunks = (Chunks)group;
} else if(group instanceof NameIdChunks) {
nameIdChunks = (NameIdChunks)group;
} else if(group instanceof RecipientChunks) {
recipientChunks = (RecipientChunks)group;
}
// Add to list(s)
if(group instanceof AttachmentChunks) {
attachments.add((AttachmentChunks)group);
}
}
attachmentChunks = attachments.toArray(new AttachmentChunks[attachments.size()]);
}
/**
* Gets a string value based on the passed chunk.
* @param chunk
* @throws ChunkNotFoundException
* @throws ChunkNotFoundException if the chunk isn't there
*/
public String getStringFromChunk(StringChunk chunk) throws ChunkNotFoundException {
Chunk out = this.chunkParser.getDocumentNode(chunk);
StringChunk strchunk = (StringChunk)out;
return strchunk.toString();
if(chunk == null) {
throw new ChunkNotFoundException();
}
return chunk.getValue();
}
/**
* Gets the plain text body of this Outlook Message
* @return The string representation of the 'text' version of the body, if available.
* @throws IOException
* @throws ChunkNotFoundException
*/
public String getTextBody() throws ChunkNotFoundException {
return getStringFromChunk(chunks.textBodyChunk);
return getStringFromChunk(mainChunks.textBodyChunk);
}
/**
@ -100,17 +132,16 @@ public class MAPIMessage {
* @throws ChunkNotFoundException
*/
public String getSubject() throws ChunkNotFoundException {
return getStringFromChunk(chunks.subjectChunk);
return getStringFromChunk(mainChunks.subjectChunk);
}
/**
* Gets the display value of the "TO" line of the outlook message
* This is not the actual list of addresses/values that will be sent to if you click Reply in the email.
* @throws ChunkNotFoundException
*/
public String getDisplayTo() throws ChunkNotFoundException {
return getStringFromChunk(chunks.displayToChunk);
return getStringFromChunk(mainChunks.displayToChunk);
}
/**
@ -119,7 +150,7 @@ public class MAPIMessage {
* @throws ChunkNotFoundException
*/
public String getDisplayFrom() throws ChunkNotFoundException {
return getStringFromChunk(chunks.displayFromChunk);
return getStringFromChunk(mainChunks.displayFromChunk);
}
/**
@ -128,7 +159,7 @@ public class MAPIMessage {
* @throws ChunkNotFoundException
*/
public String getDisplayCC() throws ChunkNotFoundException {
return getStringFromChunk(chunks.displayCCChunk);
return getStringFromChunk(mainChunks.displayCCChunk);
}
/**
@ -137,7 +168,7 @@ public class MAPIMessage {
* @throws ChunkNotFoundException
*/
public String getDisplayBCC() throws ChunkNotFoundException {
return getStringFromChunk(chunks.displayBCCChunk);
return getStringFromChunk(mainChunks.displayBCCChunk);
}
@ -147,7 +178,7 @@ public class MAPIMessage {
* @throws ChunkNotFoundException
*/
public String getConversationTopic() throws ChunkNotFoundException {
return getStringFromChunk(chunks.conversationTopic);
return getStringFromChunk(mainChunks.conversationTopic);
}
/**
@ -158,15 +189,13 @@ public class MAPIMessage {
* @throws ChunkNotFoundException
*/
public String getMessageClass() throws ChunkNotFoundException {
return getStringFromChunk(chunks.messageClass);
return getStringFromChunk(mainChunks.messageClass);
}
/**
* Gets the message attachments.
*
* @return a map containing attachment name (String) and data (ByteArrayInputStream)
*/
public Map getAttachmentFiles() {
return this.chunkParser.getAttachmentList();
public AttachmentChunks[] getAttachmentFiles() {
return attachmentChunks;
}
}

View File

@ -16,30 +16,61 @@
==================================================================== */
package org.apache.poi.hsmf.datatypes;
import java.util.ArrayList;
import java.util.List;
/**
* Collection of convenence chunks for standard parts of the MSG file attachment.
*/
public class AttachmentChunks {
public class AttachmentChunks implements ChunkGroup {
public static final String PREFIX = "__attach_version1.0_#";
/* String parts of Outlook Messages Attachments that are currently known */
public static final int ATTACH_DATA = 0x3701;
public static final int ATTACH_EXTENSION = 0x3703;
public static final int ATTACH_FILENAME = 0x3704;
public static final int ATTACH_LONG_FILENAME = 0x3707;
public static final int ATTACH_MIME_TAG = 0x370E;
public static final String namePrefix = "__attach_version1.0_#";
/* String parts of Outlook Messages Attachments that are currently known */
public ByteChunk attachData;
public StringChunk attachExtension;
public StringChunk attachFileName;
public StringChunk attachLongFileName;
public StringChunk attachMimeTag;
public ByteChunk attachData;
public StringChunk attachExtension;
public StringChunk attachFileName;
public StringChunk attachLongFileName;
public StringChunk attachMimeTag;
private AttachmentChunks(boolean newStringType) {
attachData = new ByteChunk(0x3701, 0x0102);
attachExtension = new StringChunk(0x3703, newStringType);
attachFileName = new StringChunk(0x3704, newStringType);
attachLongFileName = new StringChunk(0x3707, newStringType);
attachMimeTag = new StringChunk(0x370E, newStringType);
}
public static AttachmentChunks getInstance(boolean newStringType) {
return new AttachmentChunks(newStringType);
}
/** Holds all the chunks that were found. */
private List<Chunk> allChunks = new ArrayList<Chunk>();
public Chunk[] getAll() {
return allChunks.toArray(new Chunk[allChunks.size()]);
}
public Chunk[] getChunks() {
return getAll();
}
/**
* Called by the parser whenever a chunk is found.
*/
public void record(Chunk chunk) {
switch(chunk.getChunkId()) {
case ATTACH_DATA:
attachData = (ByteChunk)chunk;
break;
case ATTACH_EXTENSION:
attachExtension = (StringChunk)chunk;
break;
case ATTACH_FILENAME:
attachFileName = (StringChunk)chunk;
break;
case ATTACH_LONG_FILENAME:
attachLongFileName = (StringChunk)chunk;
break;
case ATTACH_MIME_TAG:
attachMimeTag = (StringChunk)chunk;
break;
}
// And add to the main list
allChunks.add(chunk);
}
}

View File

@ -16,27 +16,24 @@
==================================================================== */
package org.apache.poi.hsmf.datatypes;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.poi.util.IOUtils;
/**
* A Chunk made up of a ByteArrayOutputStream.
*/
public class ByteChunk extends Chunk {
private ByteArrayOutputStream value;
private byte[] value;
/**
* Creates a Byte Chunk, for either the old
* or new style of string chunk types.
* Creates a Byte Chunk.
*/
public ByteChunk(int chunkId, boolean newStyleString) {
this(chunkId, getStringType(newStyleString));
}
private static int getStringType(boolean newStyleString) {
if(newStyleString)
return Types.NEW_STRING;
return Types.OLD_STRING;
public ByteChunk(String entryName) {
super(entryName);
}
/**
@ -44,17 +41,21 @@ public class ByteChunk extends Chunk {
* type.
*/
public ByteChunk(int chunkId, int type) {
this.chunkId = chunkId;
this.type = type;
}
public ByteArrayOutputStream getValueByteArray() {
return this.value;
super(chunkId, type);
}
public void setValue(ByteArrayOutputStream value) {
this.value = value;
}
public void readValue(InputStream value) throws IOException {
this.value = IOUtils.toByteArray(value);
}
public void writeValue(OutputStream out) throws IOException {
out.write(value);
}
public byte[] getValue() {
return value;
}
public void setValue(byte[] value) {
this.value = value;
}
}

View File

@ -17,12 +17,33 @@
package org.apache.poi.hsmf.datatypes;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
abstract public class Chunk {
public static final String DEFAULT_NAME_PREFIX = "__substg1.0_";
protected int chunkId;
protected int type;
protected String namePrefix = "__substg1.0_";
protected String namePrefix;
protected Chunk(String entryName) {
int splitAt = entryName.lastIndexOf('_');
if(splitAt == -1 || splitAt > (entryName.length()-8)) {
throw new IllegalArgumentException("Invalid chunk name " + entryName);
}
namePrefix = entryName.substring(0, splitAt+1);
String ids = entryName.substring(splitAt+1);
chunkId = Integer.parseInt(ids.substring(0, 4), 16);
type = Integer.parseInt(ids.substring(4, 8), 16);
}
protected Chunk(int chunkId, int type) {
namePrefix = DEFAULT_NAME_PREFIX;
this.chunkId = chunkId;
this.type = type;
}
/**
* Gets the id of this chunk
@ -52,13 +73,12 @@ abstract public class Chunk {
}
/**
* Gets a reference to a ByteArrayOutputStream that contains the value of this chunk.
* Writes the value of this chunk back out again.
*/
public abstract ByteArrayOutputStream getValueByteArray();
public abstract void writeValue(OutputStream out) throws IOException;
/**
* Sets the value of this chunk using a OutputStream
* @param value
* Reads the value of this chunk using an InputStream
*/
public abstract void setValue(ByteArrayOutputStream value);
public abstract void readValue(InputStream value) throws IOException;
}

View File

@ -0,0 +1,36 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hsmf.datatypes;
/**
* A group of chunks, that are at the same point in the
* file structure.
*/
public interface ChunkGroup {
/**
* Returns the chunks that make up the group.
* Should certainly contain all the interesting Chunks,
* but needn't always contain all of the Chunks.
*/
public Chunk[] getChunks();
/**
* Called by the parser whenever a chunk is found.
*/
public void record(Chunk chunk);
}

View File

@ -17,60 +17,104 @@
package org.apache.poi.hsmf.datatypes;
import java.util.ArrayList;
import java.util.List;
/**
* Collection of convenence chunks for standard parts of the MSG file.
*
* @author Travis Ferguson
* Collection of convenience chunks for standard parts of the MSG file.
*
* Not all of these will be present in any given file
*/
public final class Chunks {
/* String parts of Outlook Messages that are currently known */
public final class Chunks implements ChunkGroup {
/* String parts of Outlook Messages that are currently known */
public static final int MESSAGE_CLASS = 0x001A;
public static final int SUBJECT = 0x0037;
public static final int DATE = 0x0047;
public static final int CONVERSATION_TOPIC = 0x0070;
public static final int SENT_BY_SERVER_TYPE = 0x0075;
// RECEIVEDEMAIL = 76
public static final int DISPLAY_TO = 0x0E04;
public static final int DISPLAY_FROM = 0x0C1A;
public static final int EMAIL_FROM = 0x0C1F;
public static final int DISPLAY_CC = 0x0E03;
public static final int DISPLAY_BCC = 0x0E02;
public static final int TEXT_BODY = 0x1000;
/** Holds all the chunks that were found. */
private List<Chunk> allChunks = new ArrayList<Chunk>();
/** Type of message that the MSG represents (ie. IPM.Note) */
public StringChunk messageClass;
/** BODY Chunk, for plain/text messages */
public StringChunk textBodyChunk;
/** Subject link chunk, in plain/text */
public StringChunk subjectChunk;
/** Value that is in the TO field (not actually the addresses as they are stored in recip directory nodes */
public StringChunk displayToChunk;
/** Value that is in the FROM field */
public StringChunk displayFromChunk;
/** value that shows in the CC field */
public StringChunk displayCCChunk;
/** Value that shows in the BCC field */
public StringChunk displayBCCChunk;
/** Sort of like the subject line, but without the RE: and FWD: parts. */
public StringChunk conversationTopic;
/** Type of server that the message originated from (SMTP, etc). */
public StringChunk sentByServerType;
/** TODO */
public StringChunk dateChunk;
/** TODO */
public StringChunk emailFromChunk;
/** Type of message that the MSG represents (ie. IPM.Note) */
public StringChunk messageClass;
/** BODY Chunk, for plain/text messages */
public StringChunk textBodyChunk;
/** Subject link chunk, in plain/text */
public StringChunk subjectChunk;
/** Value that is in the TO field (not actually the addresses as they are stored in recip directory nodes */
public StringChunk displayToChunk;
/** Value that is in the FROM field */
public StringChunk displayFromChunk;
/** value that shows in the CC field */
public StringChunk displayCCChunk;
/** Value that shows in the BCC field */
public StringChunk displayBCCChunk;
/** Sort of like the subject line, but without the RE: and FWD: parts. */
public StringChunk conversationTopic;
/** Type of server that the message originated from (SMTP, etc). */
public StringChunk sentByServerType;
/** TODO */
public StringChunk dateChunk;
/** TODO */
public StringChunk emailFromChunk;
/** TODO */
public StringChunk recipientSearchChunk;
/** TODO */
public StringChunk recipientEmailChunk;
public Chunk[] getAll() {
return allChunks.toArray(new Chunk[allChunks.size()]);
}
public Chunk[] getChunks() {
return getAll();
}
/**
* Called by the parser whenever a chunk is found.
*/
public void record(Chunk chunk) {
switch(chunk.getChunkId()) {
case MESSAGE_CLASS:
messageClass = (StringChunk)chunk;
break;
case SUBJECT:
subjectChunk = (StringChunk)chunk;
break;
case DATE:
dateChunk = (StringChunk)chunk;
break;
case CONVERSATION_TOPIC:
conversationTopic = (StringChunk)chunk;
break;
case SENT_BY_SERVER_TYPE:
sentByServerType = (StringChunk)chunk;
break;
case DISPLAY_TO:
displayToChunk = (StringChunk)chunk;
break;
case DISPLAY_FROM:
displayFromChunk = (StringChunk)chunk;
break;
case EMAIL_FROM:
emailFromChunk = (StringChunk)chunk;
break;
case DISPLAY_CC:
displayCCChunk = (StringChunk)chunk;
break;
case DISPLAY_BCC:
displayBCCChunk = (StringChunk)chunk;
break;
case TEXT_BODY:
textBodyChunk = (StringChunk)chunk;
break;
}
private Chunks(boolean newStringType) {
messageClass = new StringChunk(0x001A, newStringType);
subjectChunk = new StringChunk(0x0037, newStringType);
dateChunk = new StringChunk(0x0047, newStringType);
conversationTopic = new StringChunk(0x0070, newStringType);
sentByServerType = new StringChunk(0x0075, newStringType);
// RECEIVEDEMAIL = 76
displayToChunk = new StringChunk(0x0E04, newStringType);
displayFromChunk = new StringChunk(0x0C1A, newStringType);
emailFromChunk = new StringChunk(0x0C1F, newStringType);
displayCCChunk = new StringChunk(0x0E03, newStringType);
displayBCCChunk = new StringChunk(0x0E02, newStringType);
recipientSearchChunk = new StringChunk(0x300B, newStringType);
recipientEmailChunk = new StringChunk(0x39FE, newStringType);
textBodyChunk = new StringChunk(0x1000, newStringType);
}
public static Chunks getInstance(boolean newStringType) {
return new Chunks(newStringType);
}
// And add to the main list
allChunks.add(chunk);
}
}

View File

@ -0,0 +1,47 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hsmf.datatypes;
import java.util.ArrayList;
import java.util.List;
/**
* Collection of convenience chunks for the
* NameID part of an outlook file
*/
public final class NameIdChunks implements ChunkGroup {
public static final String PREFIX = "__nameid_version1.0";
/** Holds all the chunks that were found. */
private List<Chunk> allChunks = new ArrayList<Chunk>();
public Chunk[] getAll() {
return allChunks.toArray(new Chunk[allChunks.size()]);
}
public Chunk[] getChunks() {
return getAll();
}
/**
* Called by the parser whenever a chunk is found.
*/
public void record(Chunk chunk) {
allChunks.add(chunk);
}
}

View File

@ -0,0 +1,66 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hsmf.datatypes;
import java.util.ArrayList;
import java.util.List;
/**
* Collection of convenience chunks for the
* Recip(ient) part of an outlook file
*/
public final class RecipientChunks implements ChunkGroup {
public static final String PREFIX = "__recip_version1.0_#";
public static final int RECIPIENT_SEARCH = 0x300B;
public static final int RECIPIENT_EMAIL = 0x39FE;
/** TODO */
public StringChunk recipientSearchChunk;
/** TODO */
public StringChunk recipientEmailChunk;
/** Holds all the chunks that were found. */
private List<Chunk> allChunks = new ArrayList<Chunk>();
public Chunk[] getAll() {
return allChunks.toArray(new Chunk[allChunks.size()]);
}
public Chunk[] getChunks() {
return getAll();
}
/**
* Called by the parser whenever a chunk is found.
*/
public void record(Chunk chunk) {
switch(chunk.getChunkId()) {
case RECIPIENT_SEARCH:
recipientSearchChunk = (StringChunk)chunk;
break;
case RECIPIENT_EMAIL:
recipientEmailChunk = (StringChunk)chunk;
break;
}
// And add to the main list
allChunks.add(chunk);
}
}

View File

@ -17,30 +17,27 @@
package org.apache.poi.hsmf.datatypes;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import org.apache.poi.hsmf.datatypes.Types;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.StringUtil;
/**
* A Chunk made up of a single string.
* @author Travis Ferguson
*/
public class StringChunk extends Chunk {
private String value;
/**
* Creates a String Chunk, for either the old
* or new style of string chunk types.
* Creates a String Chunk.
*/
public StringChunk(int chunkId, boolean newStyleString) {
this(chunkId, getStringType(newStyleString));
}
private static int getStringType(boolean newStyleString) {
if(newStyleString)
return Types.NEW_STRING;
return Types.OLD_STRING;
public StringChunk(String entryName) {
super(entryName);
}
/**
@ -48,39 +45,57 @@ public class StringChunk extends Chunk {
* type.
*/
public StringChunk(int chunkId, int type) {
this.chunkId = chunkId;
this.type = type;
super(chunkId, type);
}
/* (non-Javadoc)
* @see org.apache.poi.hsmf.Chunk.Chunk#getValueByteArray()
*/
public ByteArrayOutputStream getValueByteArray() {
// TODO Auto-generated method stub
return null;
}
/* (non-Javadoc)
* @see org.apache.poi.hsmf.Chunk.Chunk#setValue(java.io.ByteArrayOutputStream)
*/
public void setValue(ByteArrayOutputStream value) {
String tmpValue;
if (type == Types.NEW_STRING) {
try {
tmpValue = new String(value.toByteArray(), "UTF-16LE");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("Core encoding not found, JVM broken?", e);
}
} else {
try {
tmpValue = new String(value.toByteArray(), "CP1252");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("Core encoding not found, JVM broken?", e);
}
}
public void readValue(InputStream value) throws IOException {
String tmpValue;
byte[] data = IOUtils.toByteArray(value);
switch(type) {
case Types.ASCII_STRING:
try {
tmpValue = new String(data, "UTF-16LE");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("Core encoding not found, JVM broken?", e);
}
break;
case Types.UNICODE_STRING:
tmpValue = StringUtil.getFromUnicodeLE(data);
break;
default:
throw new IllegalArgumentException("Invalid type " + type + " for String Chunk");
}
// Clean up
this.value = tmpValue.replace("\0", "");
}
public void writeValue(OutputStream out) throws IOException {
byte[] data;
switch(type) {
case Types.ASCII_STRING:
try {
data = value.getBytes("UTF-16LE");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("Core encoding not found, JVM broken?", e);
}
break;
case Types.UNICODE_STRING:
data = new byte[value.length()*2];
StringUtil.putUnicodeLE(value, data, 0);
break;
default:
throw new IllegalArgumentException("Invalid type " + type + " for String Chunk");
}
out.write(data);
}
public String getValue() {
return this.value;
}
public String toString() {
return this.value;
}

View File

@ -18,20 +18,20 @@
package org.apache.poi.hsmf.datatypes;
public final class Types {
public static int BINARY = 0x0102;
public static final int BINARY = 0x0102;
/**
* An 8-bit string, probably in US-ASCII, but don't quote us...
* An 8-bit string, probably in CP1252, but don't quote us...
* Normally used for everything before Outlook 3.0, and some
* fields in Outlook 3.0
*/
public static int ASCII_STRING = 0x001E;
public static final int ASCII_STRING = 0x001E;
/** A string, from Outlook 3.0 onwards. Normally unicode */
public static int UNICODE_STRING = 0x001F;
public static final int UNICODE_STRING = 0x001F;
public static int LONG = 0x0003;
public static int TIME = 0x0040;
public static int BOOLEAN = 0x000B;
public static final int LONG = 0x0003;
public static final int TIME = 0x0040;
public static final int BOOLEAN = 0x000B;
public static String asFileEnding(int type) {
String str = Integer.toHexString(type).toUpperCase();

View File

@ -20,6 +20,9 @@ package org.apache.poi.hsmf.exceptions;
public final class ChunkNotFoundException extends Exception {
private static final long serialVersionUID = 1L;
public ChunkNotFoundException() {
super("Chunk not found");
}
public ChunkNotFoundException(String chunkName) {
super(chunkName + " was named, but not found in POIFS object");
}

View File

@ -17,283 +17,127 @@
package org.apache.poi.hsmf.parsers;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.datatypes.ByteChunk;
import org.apache.poi.hsmf.datatypes.Chunk;
import org.apache.poi.hsmf.datatypes.ChunkGroup;
import org.apache.poi.hsmf.datatypes.Chunks;
import org.apache.poi.hsmf.datatypes.NameIdChunks;
import org.apache.poi.hsmf.datatypes.RecipientChunks;
import org.apache.poi.hsmf.datatypes.StringChunk;
import org.apache.poi.hsmf.datatypes.Types;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
import org.apache.poi.hsmf.exceptions.DirectoryChunkNotFoundException;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.DocumentNode;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.POIFSDocument;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.poifs.property.DirectoryProperty;
import org.apache.poi.poifs.property.DocumentProperty;
import org.apache.poi.poifs.storage.BlockWritable;
/**
* Provides a HashMap with the ability to parse a PIOFS object and provide
* an 'easy to access' hashmap structure for the document chunks inside it.
*
* @author Travis Ferguson
* Processes a POIFS of a .msg file into groups of Chunks, such as
* core data, attachment #1 data, attachment #2 data, recipient
* data and so on.
*/
public final class POIFSChunkParser {
public static ChunkGroup[] parse(POIFSFileSystem fs) throws IOException {
return parse(fs.getRoot());
}
public static ChunkGroup[] parse(DirectoryNode node) throws IOException {
Chunks mainChunks = new Chunks();
ArrayList<ChunkGroup> groups = new ArrayList<ChunkGroup>();
groups.add(mainChunks);
public POIFSChunkParser(POIFSFileSystem fs) throws IOException {
this.setFileSystem(fs);
}
/**
* Set the POIFileSystem object that this object is using.
* @param fs
*/
public void setFileSystem(POIFSFileSystem fs) throws IOException {
this.fs = fs;
this.reparseFileSystem();
}
/**
* Get a reference to the FileSystem object that this object is currently using.
*/
public POIFSFileSystem getFileSystem() {
return this.fs;
}
/**
* Reparse the FileSystem object, resetting all the chunks stored in this object
*
*/
public void reparseFileSystem() throws IOException {
// first clear this object of all chunks
DirectoryEntry root = this.fs.getRoot();
Iterator<Entry> iter = root.getEntries();
this.directoryMap = this.processPOIIterator(iter);
}
/**
* Returns a list of the standard chunk types, as
* appropriate for the chunks we find in the file.
*/
public Chunks identifyChunks() {
return Chunks.getInstance(this.isNewChunkVersion(this.directoryMap));
}
/**
* Returns a list of the standard chunk types, as
* appropriate for the chunks we find in the file attachment.
*/
private AttachmentChunks identifyAttachmentChunks(Map attachmentMap) {
return AttachmentChunks.getInstance(this.isNewChunkVersion(attachmentMap));
}
/**
* Return chunk version of the map in parameter
*/
private boolean isNewChunkVersion(Map map) {
// Are they of the old or new type of strings?
boolean hasOldStrings = false;
boolean hasNewStrings = false;
String oldStringEnd = Types.asFileEnding(Types.OLD_STRING);
String newStringEnd = Types.asFileEnding(Types.NEW_STRING);
for(Iterator i = map.keySet().iterator(); i.hasNext();) {
String entry = (String)i.next();
if(entry.endsWith( oldStringEnd )) {
hasOldStrings = true;
}
if(entry.endsWith( newStringEnd )) {
hasNewStrings = true;
}
}
if(hasOldStrings && hasNewStrings) {
throw new IllegalStateException("Your file contains string chunks of both the old and new types. Giving up");
} else if(hasNewStrings) {
return true;
}
return false;
}
/**
* Pull the chunk data that's stored in this object's hashmap out and return it as a HashMap.
* @param entryName
*/
public Object getChunk(HashMap dirMap, String entryName) {
if(dirMap == null) {
return null;
}
return dirMap.get(entryName);
}
/**
* Pull a directory/hashmap out of this hashmap and return it
* @param directoryName
* @return HashMap containing the chunks stored in the named directoryChunk
* @throws DirectoryChunkNotFoundException This is thrown should the directoryMap HashMap on this object be null
* or for some reason the directory is not found, is equal to null, or is for some reason not a HashMap/aka Directory Node.
*/
public HashMap getDirectoryChunk(String directoryName) throws DirectoryChunkNotFoundException {
DirectoryChunkNotFoundException excep = new DirectoryChunkNotFoundException(directoryName);
Object obj = getChunk(this.directoryMap, directoryName);
if(obj == null || !(obj instanceof HashMap)) throw excep;
return (HashMap)obj;
}
/**
* Pulls a ByteArrayOutputStream from this objects HashMap, this can be used to read a byte array of the contents of the given chunk.
* @param dirNode
* @param chunk
* @throws ChunkNotFoundException
*/
public Chunk getDocumentNode(HashMap dirNode, Chunk chunk) throws ChunkNotFoundException {
String entryName = chunk.getEntryName();
ChunkNotFoundException excep = new ChunkNotFoundException(entryName);
Object obj = getChunk(dirNode, entryName);
if(obj == null || !(obj instanceof ByteArrayOutputStream)) throw excep;
chunk.setValue((ByteArrayOutputStream)obj);
return chunk;
}
/**
* Pulls a Chunk out of this objects root Node tree.
* @param chunk
* @throws ChunkNotFoundException
*/
public Chunk getDocumentNode(Chunk chunk) throws ChunkNotFoundException {
return getDocumentNode(this.directoryMap, chunk);
}
/**
*
* @return a map containing attachment name (String) and data (ByteArrayInputStream)
*/
public Map getAttachmentList() {
Map attachments = new HashMap();
List attachmentList = new ArrayList();
for(Iterator i = directoryMap.keySet().iterator(); i.hasNext();) {
String entry = (String)i.next();
if(entry.startsWith(AttachmentChunks.namePrefix)) {
String attachmentIdString = entry.replace(AttachmentChunks.namePrefix, "");
try {
int attachmentId = Integer.parseInt(attachmentIdString);
attachmentList.add(directoryMap.get(entry));
} catch (NumberFormatException nfe) {
System.err.println("Invalid attachment id");
}
}
}
for (Iterator iterator = attachmentList.iterator(); iterator.hasNext();) {
HashMap AttachmentChunkMap = (HashMap) iterator.next();
AttachmentChunks attachmentChunks = this.identifyAttachmentChunks(AttachmentChunkMap);
try {
Chunk fileName = this.getDocumentNode(AttachmentChunkMap, attachmentChunks.attachLongFileName);
Chunk content = this.getDocumentNode(AttachmentChunkMap, attachmentChunks.attachData);
attachments.put(fileName.toString(), new ByteArrayInputStream(content.getValueByteArray().toByteArray()));
} catch (ChunkNotFoundException e) {
System.err.println("Invalid attachment chunk");
}
}
return attachments;
}
/**
* Processes an iterator returned by a POIFS call to getRoot().getEntries()
* @param iter
* @return
* @throws IOException
*/
private HashMap<String, HashMap<?,?>> processPOIIterator(Iterator<Entry> iter) throws IOException {
HashMap<String, HashMap<?,?>> currentNode = new HashMap<String, HashMap<?,?>>();
while(iter.hasNext()) {
Entry entry = iter.next();
if(entry instanceof DocumentNode) {
this.processDocumentNode((DocumentNode)entry, currentNode);
} else if(entry instanceof DirectoryNode) {
DirectoryNode dir = (DirectoryNode)entry;
String blockName = dir.getName();
// Recurse down, storing on the hashmap
currentNode.put(blockName, processPOIIterator(dir.getEntries()));
} else if(entry instanceof DirectoryProperty) {
//don't do anything with the directory property chunk...
} else {
System.err.println("Unknown node: " + entry.toString());
}
}
return currentNode;
}
/**
* Processes a document node and adds it to the current directory HashMap
* @param obj
* @throws java.io.IOException
*/
private void processDocumentNode(DocumentNode obj, HashMap currentObj) throws IOException {
String blockName = obj.getName();
Iterator viewIt = null;
if( obj.preferArray()) {
Object[] arr = obj.getViewableArray();
ArrayList viewList = new ArrayList(arr.length);
for(int i = 0; i < arr.length; i++) {
viewList.add(arr[i]);
}
viewIt = viewList.iterator();
} else {
viewIt = obj.getViewableIterator();
}
while(viewIt.hasNext()) {
Object view = viewIt.next();
if(view instanceof DocumentProperty) {
//we don't care about the properties
} else if(view instanceof POIFSDocument) {
//check if our node has blocks or if it can just be read raw.
int blockCount = ((POIFSDocument)view).countBlocks();
//System.out.println("Block Name: " + blockName);
if(blockCount <= 0) {
ByteArrayOutputStream out = new ByteArrayOutputStream();
BlockWritable[] bws = ((POIFSDocument)view).getSmallBlocks();
for(int i = 0; i < bws.length; i++) {
bws[i].writeBlocks(out);
}
currentObj.put(blockName, out);
} else {
ByteArrayOutputStream out = new ByteArrayOutputStream();
((POIFSDocument)view).writeBlocks(out);
currentObj.put(blockName, out);
}
} else {
System.err.println("Unknown View Type: " + view.toString());
}
}
}
/* private instance variables */
private static final long serialVersionUID = 1L;
private POIFSFileSystem fs;
private HashMap directoryMap;
// Find our top level children
// Note - we don't handle children of children yet, as
// there doesn't seem to be any use of that in Outlook
for(Entry entry : node) {
if(entry instanceof DirectoryNode) {
DirectoryNode dir = (DirectoryNode)node;
ChunkGroup group = null;
// Do we know what to do with it?
if(dir.getName().startsWith(AttachmentChunks.PREFIX)) {
group = new AttachmentChunks();
}
if(dir.getName().startsWith(NameIdChunks.PREFIX)) {
group = new NameIdChunks();
}
if(dir.getName().startsWith(RecipientChunks.PREFIX)) {
group = new NameIdChunks();
}
if(group != null) {
processChunks(dir, group);
groups.add(group);
} else {
// Unknown directory, skip silently
}
}
}
// Now do the top level chunks
processChunks(node, mainChunks);
// Finish
return groups.toArray(new ChunkGroup[groups.size()]);
}
/**
* Creates all the chunks for a given Directory, but
* doesn't recurse or descend
*/
protected static void processChunks(DirectoryNode node, ChunkGroup grouping) {
for(Entry entry : node) {
if(entry instanceof DocumentNode) {
process((DocumentNode)entry, grouping);
}
}
}
/**
* Creates a chunk, and gives it to its parent group
*/
protected static void process(DocumentNode entry, ChunkGroup grouping) {
if(entry.getName().length() < 9) {
// Name in the wrong format
return;
}
if(entry.getName().indexOf('_') == -1) {
// Name in the wrong format
return;
}
// See if we can get a type for it
String ending = entry.getName().substring(entry.getName().length()-4);
try {
int type = Integer.parseInt(ending, 16);
Chunk chunk = null;
switch(type) {
case Types.BINARY:
chunk = new ByteChunk(entry.getName());
break;
case Types.ASCII_STRING:
case Types.UNICODE_STRING:
chunk = new StringChunk(entry.getName());
break;
}
if(chunk != null) {
try {
DocumentInputStream inp = new DocumentInputStream(entry);
chunk.readValue(inp);
grouping.record(chunk);
} catch(IOException e) {
System.err.println("Error reading from part " + entry.getName() + " - " + e.toString());
}
}
} catch(NumberFormatException e) {
// Name in the wrong format
return;
}
}
}

View File

@ -20,6 +20,7 @@ package org.apache.poi.hsmf.model;
import org.apache.poi.hsmf.datatypes.Chunk;
import org.apache.poi.hsmf.datatypes.Chunks;
import org.apache.poi.hsmf.datatypes.StringChunk;
import org.apache.poi.hsmf.datatypes.Types;
import junit.framework.TestCase;
@ -31,47 +32,54 @@ import junit.framework.TestCase;
*
*/
public final class TestChunkData extends TestCase {
private Chunks chunks = Chunks.getInstance(false);
public void testChunkCreate() {
StringChunk chunk = new StringChunk(0x0200, false);
TestCase.assertEquals("__substg1.0_0200001E", chunk.getEntryName());
Chunk chunk;
chunk = new StringChunk(0x0200, 0x001E);
assertEquals("__substg1.0_0200001E", chunk.getEntryName());
assertEquals(0x0200, chunk.getChunkId());
assertEquals(0x001E, chunk.getType());
chunk = new StringChunk("__substg1.0_0200001E");
assertEquals("__substg1.0_0200001E", chunk.getEntryName());
assertEquals(0x0200, chunk.getChunkId());
assertEquals(0x001E, chunk.getType());
/* test the lower and upper limits of the chunk ids */
chunk = new StringChunk(0x0000, false);
TestCase.assertEquals("__substg1.0_0000001E", chunk.getEntryName());
chunk = new StringChunk(0x0000, 0x001E);
assertEquals("__substg1.0_0000001E", chunk.getEntryName());
chunk = new StringChunk(0xFFFF, false);
TestCase.assertEquals("__substg1.0_FFFF001E", chunk.getEntryName());
chunk = new StringChunk(0xFFFF, 0x001E);
assertEquals("__substg1.0_FFFF001E", chunk.getEntryName());
chunk = new StringChunk(0xFFFF, true);
TestCase.assertEquals("__substg1.0_FFFF001F", chunk.getEntryName());
chunk = new StringChunk(0xFFFF, 0x001F);
assertEquals("__substg1.0_FFFF001F", chunk.getEntryName());
}
public void testTextBodyChunk() {
StringChunk chunk = new StringChunk(0x1000, false);
TestCase.assertEquals(chunk.getEntryName(), chunks.textBodyChunk.getEntryName());
StringChunk chunk = new StringChunk(0x1000, Types.UNICODE_STRING);
assertEquals(chunk.getChunkId(), Chunks.TEXT_BODY);
}
public void testDisplayToChunk() {
StringChunk chunk = new StringChunk(0x0E04, false);
TestCase.assertEquals(chunk.getEntryName(), chunks.displayToChunk.getEntryName());
StringChunk chunk = new StringChunk(0x0E04, Types.UNICODE_STRING);
assertEquals(chunk.getChunkId(), Chunks.DISPLAY_TO);
}
public void testDisplayCCChunk() {
StringChunk chunk = new StringChunk(0x0E03, false);
TestCase.assertEquals(chunk.getEntryName(), chunks.displayCCChunk.getEntryName());
StringChunk chunk = new StringChunk(0x0E03, Types.UNICODE_STRING);
assertEquals(chunk.getChunkId(), Chunks.DISPLAY_CC);
}
public void testDisplayBCCChunk() {
StringChunk chunk = new StringChunk(0x0E02, false);
TestCase.assertEquals(chunk.getEntryName(), chunks.displayBCCChunk.getEntryName());
StringChunk chunk = new StringChunk(0x0E02, Types.UNICODE_STRING);
assertEquals(chunk.getChunkId(), Chunks.DISPLAY_BCC);
}
public void testSubjectChunk() {
Chunk chunk = new StringChunk(0x0037, false);
TestCase.assertEquals(chunk.getEntryName(), chunks.subjectChunk.getEntryName());
Chunk chunk = new StringChunk(0x0037, Types.UNICODE_STRING);
assertEquals(chunk.getChunkId(), Chunks.SUBJECT);
}
}

View File

@ -26,6 +26,7 @@ import java.util.Map;
import junit.framework.TestCase;
import org.apache.poi.hsmf.MAPIMessage;
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
import org.apache.poi.POIDataSamples;
@ -55,8 +56,8 @@ public class TestFileWithAttachmentsRead extends TestCase {
*/
// public void testReadDisplayCC() throws ChunkNotFoundException {
public void testRetrieveAttachments() {
Map attachmentsMap = mapiMessage.getAttachmentFiles();
int obtained = attachmentsMap.size();
AttachmentChunks[] attachments = mapiMessage.getAttachmentFiles();
int obtained = attachments.length;
int expected = 2;
TestCase.assertEquals(obtained, expected);
@ -69,19 +70,16 @@ public class TestFileWithAttachmentsRead extends TestCase {
*
*/
public void testReadAttachments() throws IOException {
Map attachmentsMap = mapiMessage.getAttachmentFiles();
AttachmentChunks[] attachments = mapiMessage.getAttachmentFiles();
for (Iterator iterator = attachmentsMap.keySet().iterator(); iterator.hasNext();) {
String fileName = (String) iterator.next();
ByteArrayInputStream fileStream = (ByteArrayInputStream) attachmentsMap.get(fileName);
ByteArrayOutputStream fileContent = new ByteArrayOutputStream();
while (fileStream.available() > 0) {
fileContent.write(fileStream.read());
}
String obtained = new String(fileContent.toByteArray(), "UTF-8");
assertTrue(obtained.trim().length() > 0);
for (AttachmentChunks attachment : attachments) {
assertTrue(attachment.attachFileName.getValue().length() > 0);
assertTrue(attachment.attachLongFileName.getValue().length() > 0);
assertTrue(attachment.attachExtension.getValue().length() > 0);
assertTrue(attachment.attachMimeTag.getValue().length() > 0);
}
// TODO better checking
}
}

View File

@ -0,0 +1,46 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hsmf.model;
import org.apache.poi.hsmf.datatypes.Types;
import junit.framework.TestCase;
/**
* Verifies that the Types class is behaving properly.
* Also check that no changes have been made that will
* break the library.
*/
public final class TestTypes extends TestCase {
public void testTypeIds() {
assertEquals(0x1e, Types.ASCII_STRING);
assertEquals(0x1f, Types.UNICODE_STRING);
assertEquals(0x0102, Types.BINARY);
assertEquals(0x000B, Types.BOOLEAN);
assertEquals(0x0003, Types.LONG);
assertEquals(0x0040, Types.TIME);
}
public void testTypeFormatting() {
assertEquals("0000", Types.asFileEnding(0x0000));
assertEquals("0020", Types.asFileEnding(0x0020));
assertEquals("0102", Types.asFileEnding(0x0102));
assertEquals("FEDC", Types.asFileEnding(0xfedc));
}
}