diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index a43864184..fff8705ae 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -36,7 +36,8 @@ - + + 45804 - Update HSMF to handle Outlook 3.0 msg files, which have a different string chunk type Expose the name of Named Cell Styles via HSSFCellStyle (normally held on the parent style though) 45978 - Fixed IOOBE in Ref3DPtg.toFormulaString() due eager initialisation of SheetReferences Made HSSFFormulaEvaluator no longer require initialisation with sheet or row diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 965ceeefa..e8d18cee7 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -33,7 +33,8 @@ - + + 45804 - Update HSMF to handle Outlook 3.0 msg files, which have a different string chunk type Expose the name of Named Cell Styles via HSSFCellStyle (normally held on the parent style though) 45978 - Fixed IOOBE in Ref3DPtg.toFormulaString() due eager initialisation of SheetReferences Made HSSFFormulaEvaluator no longer require initialisation with sheet or row diff --git a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java index eb915160b..3669a5333 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java @@ -37,6 +37,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; public class MAPIMessage { private POIFSChunkParser chunkParser; private POIFSFileSystem fs; + private Chunks chunks; /** * Constructor for creating new files. @@ -64,6 +65,10 @@ public class MAPIMessage { public MAPIMessage(InputStream in) throws IOException { this.fs = new POIFSFileSystem(in); chunkParser = new POIFSChunkParser(this.fs); + + // Figure out the right string type, based on + // the chunks present + chunks = chunkParser.identifyChunks(); } @@ -87,7 +92,7 @@ public class MAPIMessage { * @throws ChunkNotFoundException */ public String getTextBody() throws IOException, ChunkNotFoundException { - return getStringFromChunk(Chunks.getInstance().textBodyChunk); + return getStringFromChunk(chunks.textBodyChunk); } /** @@ -96,7 +101,7 @@ public class MAPIMessage { * @throws ChunkNotFoundException */ public String getSubject() throws ChunkNotFoundException { - return getStringFromChunk(Chunks.getInstance().subjectChunk); + return getStringFromChunk(chunks.subjectChunk); } @@ -107,7 +112,7 @@ public class MAPIMessage { * @throws ChunkNotFoundException */ public String getDisplayTo() throws ChunkNotFoundException { - return getStringFromChunk(Chunks.getInstance().displayToChunk); + return getStringFromChunk(chunks.displayToChunk); } /** @@ -117,7 +122,7 @@ public class MAPIMessage { * @throws ChunkNotFoundException */ public String getDisplayFrom() throws ChunkNotFoundException { - return getStringFromChunk(Chunks.getInstance().displayFromChunk); + return getStringFromChunk(chunks.displayFromChunk); } /** @@ -127,7 +132,7 @@ public class MAPIMessage { * @throws ChunkNotFoundException */ public String getDisplayCC() throws ChunkNotFoundException { - return getStringFromChunk(Chunks.getInstance().displayCCChunk); + return getStringFromChunk(chunks.displayCCChunk); } /** @@ -137,7 +142,7 @@ public class MAPIMessage { * @throws ChunkNotFoundException */ public String getDisplayBCC() throws ChunkNotFoundException { - return getStringFromChunk(Chunks.getInstance().displayBCCChunk); + return getStringFromChunk(chunks.displayBCCChunk); } @@ -148,7 +153,7 @@ public class MAPIMessage { * @throws ChunkNotFoundException */ public String getConversationTopic() throws ChunkNotFoundException { - return getStringFromChunk(Chunks.getInstance().conversationTopic); + return getStringFromChunk(chunks.conversationTopic); } /** @@ -160,6 +165,6 @@ public class MAPIMessage { * @throws ChunkNotFoundException */ public String getMessageClass() throws ChunkNotFoundException { - return getStringFromChunk(Chunks.getInstance().messageClass); + return getStringFromChunk(chunks.messageClass); } } diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java index 6a3936d96..af77badb3 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java @@ -25,17 +25,39 @@ package org.apache.poi.hsmf.datatypes; */ public class Chunks { /* String parts of Outlook Messages that are currently known */ - public StringChunk messageClass = new StringChunk(0x001A); //Type of message that the MSG represents (ie. IPM.Note) - public StringChunk textBodyChunk = new StringChunk(0x1000); //BODY Chunk, for plain/text messages - public StringChunk subjectChunk = new StringChunk(0x0037); //Subject link chunk, in plain/text - public StringChunk displayToChunk = new StringChunk(0x0E04); //Value that is in the TO field (not actually the addresses as they are stored in recip directory nodes - public StringChunk displayFromChunk = new StringChunk(0x0C1A); //Value that is in the FROM field - public StringChunk displayCCChunk = new StringChunk(0x0E03); //value that shows in the CC field - public StringChunk displayBCCChunk = new StringChunk(0x0E02); //Value that shows in the BCC field - public StringChunk conversationTopic = new StringChunk(0x0070); //Sort of like the subject line, but without the RE: and FWD: parts. - public StringChunk sentByServerType = new StringChunk(0x0075); //Type of server that the message originated from (SMTP, etc). + + /** Type of message that the MSG represents (ie. IPM.Note) */ + public StringChunk messageClass; + /** BODY Chunk, for plain/text messages */ + public StringChunk textBodyChunk; + /** Subject link chunk, in plain/text */ + public StringChunk subjectChunk; + /** Value that is in the TO field (not actually the addresses as they are stored in recip directory nodes */ + public StringChunk displayToChunk; + /** Value that is in the FROM field */ + public StringChunk displayFromChunk; + /** value that shows in the CC field */ + public StringChunk displayCCChunk; + /** Value that shows in the BCC field */ + public StringChunk displayBCCChunk; + /** Sort of like the subject line, but without the RE: and FWD: parts. */ + public StringChunk conversationTopic; + /** Type of server that the message originated from (SMTP, etc). */ + public StringChunk sentByServerType; - public static Chunks getInstance() { - return new Chunks(); + private Chunks(boolean newStringType) { + messageClass = new StringChunk(0x001A, newStringType); + textBodyChunk = new StringChunk(0x1000, newStringType); + subjectChunk = new StringChunk(0x0037, newStringType); + displayToChunk = new StringChunk(0x0E04, newStringType); + displayFromChunk = new StringChunk(0x0C1A, newStringType); + displayCCChunk = new StringChunk(0x0E03, newStringType); + displayBCCChunk = new StringChunk(0x0E02, newStringType); + conversationTopic = new StringChunk(0x0070, newStringType); + sentByServerType = new StringChunk(0x0075, newStringType); + } + + public static Chunks getInstance(boolean newStringType) { + return new Chunks(newStringType); } } diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/StringChunk.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/StringChunk.java index 2058b8ac5..fe90bd688 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/StringChunk.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/StringChunk.java @@ -27,9 +27,26 @@ public class StringChunk extends Chunk { private String value; - public StringChunk(int chunkId) { + /** + * Creates a String Chunk, for either the old + * or new style of string chunk types. + */ + public StringChunk(int chunkId, boolean newStyleString) { + this(chunkId, getStringType(newStyleString)); + } + private static int getStringType(boolean newStyleString) { + if(newStyleString) + return Types.NEW_STRING; + return Types.OLD_STRING; + } + + /** + * Create a String Chunk, with the specified + * type. + */ + public StringChunk(int chunkId, int type) { this.chunkId = chunkId; - this.type = Types.STRING; + this.type = type; } /* (non-Javadoc) diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Types.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Types.java index 9297666af..f4d675a05 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Types.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Types.java @@ -19,8 +19,21 @@ package org.apache.poi.hsmf.datatypes; public class Types { public static int BINARY = 0x0102; - public static int STRING = 0x001E; + + /** A string, until Outlook 3.0 */ + public static int OLD_STRING = 0x001E; + /** A string, from Outlook 3.0 onwards */ + public static int NEW_STRING = 0x001F; + public static int LONG = 0x0003; public static int TIME = 0x0040; public static int BOOLEAN = 0x000B; + + public static String asFileEnding(int type) { + String str = Integer.toHexString(type).toUpperCase(); + while(str.length() < 4) { + str = "0" + str; + } + return str; + } } diff --git a/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java b/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java index bdfb29e2a..c1e174e08 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java @@ -24,6 +24,8 @@ import java.util.HashMap; import java.util.Iterator; import org.apache.poi.hsmf.datatypes.Chunk; +import org.apache.poi.hsmf.datatypes.Chunks; +import org.apache.poi.hsmf.datatypes.Types; import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; import org.apache.poi.hsmf.exceptions.DirectoryChunkNotFoundException; import org.apache.poi.poifs.filesystem.DirectoryEntry; @@ -82,7 +84,36 @@ public class POIFSChunkParser { this.directoryMap = this.processPOIIterator(iter); } - + + /** + * Returns a list of the standard chunk types, as + * appropriate for the chunks we find in the file. + */ + public Chunks identifyChunks() { + // Are they of the old or new type of strings? + boolean hasOldStrings = false; + boolean hasNewStrings = false; + String oldStringEnd = Types.asFileEnding(Types.OLD_STRING); + String newStringEnd = Types.asFileEnding(Types.NEW_STRING); + + for(Iterator i = directoryMap.keySet().iterator(); i.hasNext();) { + String entry = (String)i.next(); + if(entry.endsWith( oldStringEnd )) { + hasOldStrings = true; + } + if(entry.endsWith( newStringEnd )) { + hasNewStrings = true; + } + } + + if(hasOldStrings && hasNewStrings) { + throw new IllegalStateException("Your file contains string chunks of both the old and new types. Giving up"); + } else if(hasNewStrings) { + return Chunks.getInstance(true); + } + return Chunks.getInstance(false); + } + /** * Pull the chunk data that's stored in this object's hashmap out and return it as a HashMap. * @param entryName diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/AllTests.java b/src/scratchpad/testcases/org/apache/poi/hsmf/AllTests.java index 18a622c32..e117ab89e 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/AllTests.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/AllTests.java @@ -33,6 +33,7 @@ public class AllTests TestSuite suite = new TestSuite(); suite.addTestSuite(org.apache.poi.hsmf.model.TestBlankFileRead.class); suite.addTestSuite(org.apache.poi.hsmf.model.TestSimpleFileRead.class); + suite.addTestSuite(org.apache.poi.hsmf.model.TestOutlook30FileRead.class); suite.addTestSuite(org.apache.poi.hsmf.model.TestChunkData.class); return suite; diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/data/outlook_30_msg.msg b/src/scratchpad/testcases/org/apache/poi/hsmf/data/outlook_30_msg.msg new file mode 100644 index 000000000..0a585699a Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hsmf/data/outlook_30_msg.msg differ diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/model/TestChunkData.java b/src/scratchpad/testcases/org/apache/poi/hsmf/model/TestChunkData.java index dc4b53129..8f5dd4777 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/model/TestChunkData.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/model/TestChunkData.java @@ -31,42 +31,47 @@ import junit.framework.TestCase; * */ public class TestChunkData extends TestCase { + private Chunks chunks = Chunks.getInstance(false); + public void testChunkCreate() { - StringChunk chunk = new StringChunk(0x0200); + StringChunk chunk = new StringChunk(0x0200, false); TestCase.assertEquals("__substg1.0_0200001E", chunk.getEntryName()); /* test the lower and upper limits of the chunk ids */ - chunk = new StringChunk(0x0000); + chunk = new StringChunk(0x0000, false); TestCase.assertEquals("__substg1.0_0000001E", chunk.getEntryName()); - chunk = new StringChunk(0xFFFF); + chunk = new StringChunk(0xFFFF, false); TestCase.assertEquals("__substg1.0_FFFF001E", chunk.getEntryName()); + + chunk = new StringChunk(0xFFFF, true); + TestCase.assertEquals("__substg1.0_FFFF001F", chunk.getEntryName()); } public void testTextBodyChunk() { - StringChunk chunk = new StringChunk(0x1000); - TestCase.assertEquals(chunk.getEntryName(), Chunks.getInstance().textBodyChunk.getEntryName()); + StringChunk chunk = new StringChunk(0x1000, false); + TestCase.assertEquals(chunk.getEntryName(), chunks.textBodyChunk.getEntryName()); } public void testDisplayToChunk() { - StringChunk chunk = new StringChunk(0x0E04); - TestCase.assertEquals(chunk.getEntryName(), Chunks.getInstance().displayToChunk.getEntryName()); + StringChunk chunk = new StringChunk(0x0E04, false); + TestCase.assertEquals(chunk.getEntryName(), chunks.displayToChunk.getEntryName()); } public void testDisplayCCChunk() { - StringChunk chunk = new StringChunk(0x0E03); - TestCase.assertEquals(chunk.getEntryName(), Chunks.getInstance().displayCCChunk.getEntryName()); + StringChunk chunk = new StringChunk(0x0E03, false); + TestCase.assertEquals(chunk.getEntryName(), chunks.displayCCChunk.getEntryName()); } public void testDisplayBCCChunk() { - StringChunk chunk = new StringChunk(0x0E02); - TestCase.assertEquals(chunk.getEntryName(), Chunks.getInstance().displayBCCChunk.getEntryName()); + StringChunk chunk = new StringChunk(0x0E02, false); + TestCase.assertEquals(chunk.getEntryName(), chunks.displayBCCChunk.getEntryName()); } public void testSubjectChunk() { - Chunk chunk = new StringChunk(0x0037); - TestCase.assertEquals(chunk.getEntryName(), Chunks.getInstance().subjectChunk.getEntryName()); + Chunk chunk = new StringChunk(0x0037, false); + TestCase.assertEquals(chunk.getEntryName(), chunks.subjectChunk.getEntryName()); } } diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/model/TestOutlook30FileRead.java b/src/scratchpad/testcases/org/apache/poi/hsmf/model/TestOutlook30FileRead.java new file mode 100644 index 000000000..7e94405e0 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/model/TestOutlook30FileRead.java @@ -0,0 +1,135 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hsmf.model; + +import java.io.IOException; + +import org.apache.poi.hsmf.MAPIMessage; +import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; + +import junit.framework.TestCase; + +/** + * Tests to verify that we can still work on the newer Outlook 3.0 files. + */ +public class TestOutlook30FileRead extends TestCase { +private MAPIMessage mapiMessage; + + /** + * Initialize this test, load up the blank.msg mapi message. + * @throws Exception + */ + public TestOutlook30FileRead() throws IOException { + String dirname = System.getProperty("HSMF.testdata.path"); + this.mapiMessage = new MAPIMessage(dirname + "/outlook_30_msg.msg"); + } + + /** + * Test to see if we can read the CC Chunk. + * @throws ChunkNotFoundException + * + */ + public void testReadDisplayCC() throws ChunkNotFoundException { + String obtained = mapiMessage.getDisplayCC(); + String expected = ""; + + TestCase.assertEquals(obtained, expected); + } + + /** + * Test to see if we can read the CC Chunk. + * @throws ChunkNotFoundException + * + */ + public void testReadDisplayTo() throws ChunkNotFoundException { + String obtained = mapiMessage.getDisplayTo(); + + assertTrue(obtained.startsWith("Bohn, Shawn")); + } + + /** + * Test to see if we can read the From Chunk. + * @throws ChunkNotFoundException + * + */ + public void testReadDisplayFrom() throws ChunkNotFoundException { + String obtained = mapiMessage.getDisplayFrom(); + String expected = "Cramer, Nick"; + + TestCase.assertEquals(obtained, expected); + } + + /** + * Test to see if we can read the CC Chunk. + * @throws ChunkNotFoundException + * + */ + public void testReadDisplayBCC() throws ChunkNotFoundException { + String obtained = mapiMessage.getDisplayBCC(); + String expected = ""; + + TestCase.assertEquals(obtained, expected); + } + + + /** + * Check if we can read the body of the blank message, we expect "". + * + * @throws Exception + */ + public void testReadBody() throws Exception { + String obtained = mapiMessage.getTextBody(); + assertTrue(obtained.startsWith("I am shutting down")); + } + + /** + * Check if we can read the subject line of the blank message, we expect "" + * + * @throws Exception + */ + public void testReadSubject() throws Exception { + String obtained = mapiMessage.getSubject(); + String expected = "IN-SPIRE servers going down for a bit, back up around 8am"; + + TestCase.assertEquals(expected, obtained); + } + + /** + * Check if we can read the subject line of the blank message, we expect "" + * + * @throws Exception + */ + public void testReadConversationTopic() throws Exception { + String obtained = mapiMessage.getConversationTopic(); + TestCase.assertEquals("IN-SPIRE servers going down for a bit, back up around 8am", obtained); + } + + + /** + * Check if we can read the subject line of the blank message, we expect "" + * + * @throws Exception + */ + public void testReadMessageClass() throws Exception { + String obtained = mapiMessage.getMessageClass(); + TestCase.assertEquals("IPM.Note", obtained); + } + + + +}