From 0e368a23da9047b67aafaf1cb488adc49daba16f Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Fri, 8 Jan 2010 11:14:58 +0000 Subject: [PATCH] Fix some chunk types, fix the directory descent, fix the Msg2txt example, and start on fixing core tests git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@897167 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/hsmf/examples/Msg2txt.java | 50 ++++------ .../org/apache/poi/hsmf/datatypes/Chunks.java | 4 +- .../poi/hsmf/datatypes/RecipientChunks.java | 4 +- .../poi/hsmf/datatypes/StringChunk.java | 4 +- .../poi/hsmf/parsers/POIFSChunkParser.java | 4 +- .../hsmf/parsers/TestPOIFSChunkParser.java | 86 +++++++++++++++++- test-data/hsmf/quick.msg | Bin 0 -> 17920 bytes 7 files changed, 108 insertions(+), 44 deletions(-) create mode 100644 test-data/hsmf/quick.msg diff --git a/src/examples/src/org/apache/poi/hsmf/examples/Msg2txt.java b/src/examples/src/org/apache/poi/hsmf/examples/Msg2txt.java index 546334bda..bf018a3e4 100644 --- a/src/examples/src/org/apache/poi/hsmf/examples/Msg2txt.java +++ b/src/examples/src/org/apache/poi/hsmf/examples/Msg2txt.java @@ -17,16 +17,14 @@ package org.apache.poi.hsmf.examples; -import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.io.PrintWriter; -import java.util.Iterator; -import java.util.Map; import org.apache.poi.hsmf.MAPIMessage; +import org.apache.poi.hsmf.datatypes.AttachmentChunks; import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; /** @@ -35,7 +33,6 @@ import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; * attachments. * * @author Bruno Girin - * */ public class Msg2txt { @@ -105,17 +102,13 @@ public class Msg2txt { } catch (ChunkNotFoundException e) { System.err.println("No message body"); } - Map attachmentMap = msg.getAttachmentFiles(); - if(attachmentMap.size() > 0) { + + AttachmentChunks[] attachments = msg.getAttachmentFiles(); + if(attachments.length > 0) { File d = new File(attDirName); if(d.mkdir()) { - for( - Iterator ii = attachmentMap.entrySet().iterator(); - ii.hasNext(); - ) { - Map.Entry entry = (Map.Entry)ii.next(); - processAttachment(d, entry.getKey().toString(), - (ByteArrayInputStream)entry.getValue()); + for(AttachmentChunks attachment : attachments) { + processAttachment(attachment, d); } } else { System.err.println("Can't create directory "+attDirName); @@ -131,33 +124,26 @@ public class Msg2txt { /** * Processes a single attachment: reads it from the Outlook MSG file and * writes it to disk as an individual file. - * + * + * @param attachment the chunk group describing the attachment * @param dir the directory in which to write the attachment file - * @param fileName the name of the attachment file - * @param fileIn the input stream that contains the attachment's data * @throws IOException when any of the file operations fails */ - public void processAttachment(File dir, String fileName, - ByteArrayInputStream fileIn) throws IOException { + public void processAttachment(AttachmentChunks attachment, + File dir) throws IOException { + String fileName = attachment.attachFileName.toString(); + if(attachment.attachLongFileName != null) { + fileName = attachment.attachLongFileName.toString(); + } + File f = new File(dir, fileName); OutputStream fileOut = null; try { fileOut = new FileOutputStream(f); - byte[] buffer = new byte[2048]; - int bNum = fileIn.read(buffer); - while(bNum > 0) { - fileOut.write(buffer); - bNum = fileIn.read(buffer); - } + fileOut.write(attachment.attachData.getValue()); } finally { - try { - if(fileIn != null) { - fileIn.close(); - } - } finally { - if(fileOut != null) { - fileOut.close(); - } + if(fileOut != null) { + fileOut.close(); } } } diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java index fa9f9b598..9f68cc167 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java @@ -63,7 +63,7 @@ public final class Chunks implements ChunkGroup { /** Type of server that the message originated from (SMTP, etc). */ public StringChunk sentByServerType; /** TODO */ - public StringChunk dateChunk; + public ByteChunk dateChunk; /** TODO */ public StringChunk emailFromChunk; @@ -86,7 +86,7 @@ public final class Chunks implements ChunkGroup { subjectChunk = (StringChunk)chunk; break; case DATE: - dateChunk = (StringChunk)chunk; + dateChunk = (ByteChunk)chunk; break; case CONVERSATION_TOPIC: conversationTopic = (StringChunk)chunk; diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/RecipientChunks.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/RecipientChunks.java index 27a9ecca8..b5157bb2c 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/RecipientChunks.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/RecipientChunks.java @@ -32,7 +32,7 @@ public final class RecipientChunks implements ChunkGroup { public static final int RECIPIENT_EMAIL = 0x39FE; /** TODO */ - public StringChunk recipientSearchChunk; + public ByteChunk recipientSearchChunk; /** TODO */ public StringChunk recipientEmailChunk; @@ -53,7 +53,7 @@ public final class RecipientChunks implements ChunkGroup { public void record(Chunk chunk) { switch(chunk.getChunkId()) { case RECIPIENT_SEARCH: - recipientSearchChunk = (StringChunk)chunk; + recipientSearchChunk = (ByteChunk)chunk; break; case RECIPIENT_EMAIL: recipientEmailChunk = (StringChunk)chunk; diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/StringChunk.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/StringChunk.java index 91558ba5c..b1f859d04 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/StringChunk.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/StringChunk.java @@ -55,7 +55,7 @@ public class StringChunk extends Chunk { switch(type) { case Types.ASCII_STRING: try { - tmpValue = new String(data, "UTF-16LE"); + tmpValue = new String(data, "CP1252"); } catch (UnsupportedEncodingException e) { throw new RuntimeException("Core encoding not found, JVM broken?", e); } @@ -77,7 +77,7 @@ public class StringChunk extends Chunk { switch(type) { case Types.ASCII_STRING: try { - data = value.getBytes("UTF-16LE"); + data = value.getBytes("CP1252"); } catch (UnsupportedEncodingException e) { throw new RuntimeException("Core encoding not found, JVM broken?", e); } diff --git a/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java b/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java index adbe1a7ac..eb79ae9ad 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java @@ -55,7 +55,7 @@ public final class POIFSChunkParser { // there doesn't seem to be any use of that in Outlook for(Entry entry : node) { if(entry instanceof DirectoryNode) { - DirectoryNode dir = (DirectoryNode)node; + DirectoryNode dir = (DirectoryNode)entry; ChunkGroup group = null; // Do we know what to do with it? @@ -66,7 +66,7 @@ public final class POIFSChunkParser { group = new NameIdChunks(); } if(dir.getName().startsWith(RecipientChunks.PREFIX)) { - group = new NameIdChunks(); + group = new RecipientChunks(); } if(group != null) { diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/parsers/TestPOIFSChunkParser.java b/src/scratchpad/testcases/org/apache/poi/hsmf/parsers/TestPOIFSChunkParser.java index 06bc5f9e3..a86a4cec5 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/parsers/TestPOIFSChunkParser.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/parsers/TestPOIFSChunkParser.java @@ -18,8 +18,16 @@ package org.apache.poi.hsmf.parsers; import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.IOException; +import org.apache.poi.hsmf.MAPIMessage; +import org.apache.poi.hsmf.datatypes.AttachmentChunks; +import org.apache.poi.hsmf.datatypes.ChunkGroup; +import org.apache.poi.hsmf.datatypes.Chunks; +import org.apache.poi.hsmf.datatypes.NameIdChunks; +import org.apache.poi.hsmf.datatypes.RecipientChunks; +import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.POIDataSamples; @@ -44,13 +52,83 @@ public final class TestPOIFSChunkParser extends TestCase { new FileInputStream(samples.getFile("attachment_test_msg.msg")) ); POIFSFileSystem without = new POIFSFileSystem( - new FileInputStream(samples.getFile("simple_test_msg.msg")) + new FileInputStream(samples.getFile("quick.msg")) ); + AttachmentChunks attachment; - // Check details on the one with - - // One with, from the top + + // Check raw details on the one with + with.getRoot().getEntry("__attach_version1.0_#00000000"); + with.getRoot().getEntry("__attach_version1.0_#00000001"); + POIFSChunkParser.parse(with.getRoot()); + + ChunkGroup[] groups = POIFSChunkParser.parse(with.getRoot()); + assertEquals(5, groups.length); + assertTrue(groups[0] instanceof Chunks); + assertTrue(groups[1] instanceof RecipientChunks); + assertTrue(groups[2] instanceof AttachmentChunks); + assertTrue(groups[3] instanceof AttachmentChunks); + assertTrue(groups[4] instanceof NameIdChunks); + + attachment = (AttachmentChunks)groups[2]; + assertEquals("TEST-U~1.DOC", attachment.attachFileName.toString()); + assertEquals("test-unicode.doc", attachment.attachLongFileName.toString()); + assertEquals(24064, attachment.attachData.getValue().length); + + attachment = (AttachmentChunks)groups[3]; + assertEquals("pj1.txt", attachment.attachFileName.toString()); + assertEquals("pj1.txt", attachment.attachLongFileName.toString()); + assertEquals(89, attachment.attachData.getValue().length); + + // Check raw details on one without + try { + without.getRoot().getEntry("__attach_version1.0_#00000000"); + fail(); + } catch(FileNotFoundException e) {} + try { + without.getRoot().getEntry("__attach_version1.0_#00000001"); + fail(); + } catch(FileNotFoundException e) {} + + + // One with, from the top + MAPIMessage msgWith = new MAPIMessage(with); + assertEquals(2, msgWith.getAttachmentFiles().length); + + attachment = msgWith.getAttachmentFiles()[0]; + assertEquals("TEST-U~1.DOC", attachment.attachFileName.toString()); + assertEquals("test-unicode.doc", attachment.attachLongFileName.toString()); + assertEquals(24064, attachment.attachData.getValue().length); + + attachment = msgWith.getAttachmentFiles()[1]; + assertEquals("pj1.txt", attachment.attachFileName.toString()); + assertEquals("pj1.txt", attachment.attachLongFileName.toString()); + assertEquals(89, attachment.attachData.getValue().length); + + // Plus check core details are there + try { + assertEquals("'nicolas1.23456@free.fr'", msgWith.getDisplayTo()); + assertEquals("Nicolas1 23456", msgWith.getDisplayFrom()); + assertEquals("test pi\u00e8ce jointe 1", msgWith.getSubject()); + } catch(ChunkNotFoundException e) { + fail(); + } + + // One without, from the top + MAPIMessage msgWithout = new MAPIMessage(without); + + // No attachments + assertEquals(0, msgWithout.getAttachmentFiles().length); + + // But has core details + try { + assertEquals("Kevin Roast", msgWithout.getDisplayTo()); + assertEquals("Kevin Roast", msgWithout.getDisplayFrom()); + assertEquals("Test the content transformer", msgWithout.getSubject()); + } catch(ChunkNotFoundException e) { + fail(); + } } } diff --git a/test-data/hsmf/quick.msg b/test-data/hsmf/quick.msg new file mode 100644 index 0000000000000000000000000000000000000000..05326d2c84a14e0cc5d715fad173326b8d453a1d GIT binary patch literal 17920 zcmeHPYiv`=6`p%9kAw>$bi0)BFsAHMmR*eP*a;zIv6DF91rpXVY?jjIg&`?m2ipYb z17%SwRqcRIRkFs;d0x&pv?dzp854a=8--|~XT&|6 zo}NauMNE%RXaQLuCQ|e-E8HR?k`$)JMN}o5R*S_mb^f&GtBFf{pB;be17ju9t#uL) zia{|!s44nQP?#0t6lN%PQpy>UQ&cja@Ca{-{Jqp7pP_iPQ<%L(lK+WnrWo&HCmQ%k zF)Ypy+PFB*##Nba6^5azdzXqV3a@NwlX_oyyzo`a-ogg8L# zrMO7`Ck4;jgemd2Q7l(~^x5UPTozIIKq){W%mQ03C9Wf`CtgNu6XTl|#H)$X=V=HK ztB5~E{1ox0iO~ku6Mu$y4RI4Os(KwUzS%&bi+Cfkn|Kp3;-Ap`Sm1H@e|v>9LtLYJ zCF*ek=Xa>>z?1$@<6pi_=DYu^R;R{+C;eYpk8)JPZx*dAeBdfVo;vbT0{94^%J`jx?;}QuOB4Jb_$u&EP=?Z} z3S&`E@my48{2mtmvy@jg`fsQ7XBha7_ow&&;Ge-apfmGlot+w1MGbm!v3 zSLNx?>i-6ORQRD>{O2ewPk)Smk6-`KvGwQq|5%5^7w77~g{?pO|Krym{be^Xs8ah2 zGTR^4lURRn_1{csIsHFKGe4hUaTt$?%mM~W9=U?D=bM%*e8T&~ngBF|;z*o?Zzre>HoOb+?xYg*7xmw@; zfDivui*H~&#k!rVKl}w<`@^j7<+Z=6^+(;mM+_=A{sF(4Isd?)*-FgSAO2#L7*q+r zhf2V)zp8sKKGte(fD@I&caT}@AMshr2RU8AM7t< zz6O=UhuxcH|JSzv8Emfpoc#x<{i76Gd;{lRZelJz`s#LKP&xgZsqgp9(fAA7>&9RB zei5es3xC=m=IRf7>HHrDpSAv~y8bct!e;=Lvp>#XocTvv|5f7yH{buP_54fqJ+A$~ zNci7Gg%P1I=otG8NTbew@QgK=`VQ+4^^Ef$Jj1>!4*rRHRw3qV^^EZg&(Gw=LB^P_ z(mbE{?8!ZQ^Wu;$Kyr|{g}9Y?Cvh7w&gI&PvF7X`evx<&afmof+)3O;+)W%I#-YVt zVuh^>V+X)1=6_9rIClkQk*IFr?;EtO#P)vv6d?!3I|rFVb!lRy4)=_+aq4Gk|G zE0wflT#cWLTFW0N)15sxK6=VMl1`mZx=*AoxW9Dn%tXeWnoOjnWls-(`J(%H>f{Ra zqhj*;JZQ$Vrazlon}56HXyn^JU)Q^PP2hXgW>l&EJ&}V4bouS>^tE>Sx_Wv7{+)im zFA(nB*%=OZcXxOBy{$f<-|vlhyS)BjBoyDbuiqaT=#2OVJY%U$HgPqEhWm7_ItAD%xAB(R1emJ}MYHY=gvHB_Fx^M?QL9eH+?Ykc1 zy3cD)9h;a|p&ebl9kEDP^iVX?8}Em-KXN46>xuP+`{SX`NN+A%b`Tk0 zCoP#UGO%l2Rj9T9La9IeIP|M)osDaM`d*!Qe#gzrxv5cn5{i8i7$qw%7g=llFZA*o zAG|d-9KLlcb?NN~(Uv#A_5RnV_S}HO!lX*$Cnt-AZom2N|MQJM+HttQeYnHjKGCsH zPHO$RNv-{KN8XIL-P`JI@ddnX0e>)fbofDs=PvCsn;Yy-N6Wnx&qk}_gCUQs9PMPJIr^aj5yp%b%{j{ zc*Yr%MW>7|VM!S{*vD`Qvp}|KXBYchsvH*WG~~-Jx(x{#dyx3vrn@DO6{Kdcm8nY=E z|1hP+86c><{ljwilhgiytH{Z<@-KAPwZEY3eXR3Wc{>XGUMNFlWan)^8=rOmL*iDW z|I380>;Fn~j{O6aANB{i`lIb*{}NQL{=GBy*WQ1rTK_)6#U4DUJU%p^oBJmx3Aa(A z>+mrOX!SR+*NS~suKh9D?_=TP{7H|m+8!4l`>Hwkvi5h*fAFiv$38Rmmbv)Yq|w>m z!N>W#9zT@B=Hefu^f-eAm1}>F=DhZYy}J%#ENpGdu{u|N90!l%cYNnvyCUo&7)1`8SA zMJooZ7jU+R^@C}I3)TlNIvp!~-<9|NKemfjO@;6GN*Uv!v)*`F%Fg=Zu5B8zJo&zD zi&vzq)=Rb+kTQKP$wjAP$RA|`yXdYyWcUz{JRxPZ{xOZSQpWkRCGJxB&J}$>S@iw+ zqW2dH-kV0#a^S&Vaq?R$c{8N}Sz@!4e{;`~Z80KcN5cDyQdaqyMtvjl$Nd^h+@|sbrHnDhDbtjcRbO$5 zkEOgR_udwFq)ehDTih*@?-j_VVS9mx@!S$0*_Qaz9?0L%okUO>q+FkKcr5Wi%72-W zn?gvh$P#Od3QA6PeMJCzZw$i$(Lje_Q+cQ=S1v5=oZQd5LO0f2_6sP?WiA zzkJ(R;%Vt8sFB1LD+}fU`rkNKVw6=QE&bKMlpVk1-!#q}avoOm0_{sn86ygq_N}F? z{17#7JAMo1ZPBLkrsS0Vp_H3uzIX4!dzHT}LQ+OrkR{F*$$uTp&(9KnFOokhlJ6GC zE^$xF3ePm|OIi70y)aW)bT5STmG(f{TkaiwAK`3B ojo)e>Q1giz&(-{-<~cQAs(E*|^NuQOo!hbUH7BA+#dE{|0vc+4z5oCK literal 0 HcmV?d00001