From 0e2e61d1e127499143a44dd550ac116adc6a2d4d Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Wed, 5 Aug 2015 15:58:43 +0000 Subject: [PATCH] NPE fix for text extraction from MSG files with only a short name git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1694255 13f79535-47bb-0310-9956-ffa450edef68 --- .../hsmf/extractor/OutlookTextExtactor.java | 11 ++++++++--- .../extractor/TestOutlookTextExtractor.java | 19 +++++++++++++++++++ src/testcases/org/apache/poi/POITestCase.java | 6 ++++++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java b/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java index 4ad8b37c1..4cdaf16f8 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java @@ -25,6 +25,7 @@ import java.util.TimeZone; import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hsmf.MAPIMessage; import org.apache.poi.hsmf.datatypes.AttachmentChunks; +import org.apache.poi.hsmf.datatypes.StringChunk; import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; @@ -65,6 +66,7 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor { new NPOIFSFileSystem(new File(filename)) ); System.out.println( extractor.getText() ); + extractor.close(); } } @@ -146,12 +148,15 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor { // Display attachment names // To get the attachments, use ExtractorFactory for(AttachmentChunks att : msg.getAttachmentFiles()) { - String ats = att.attachLongFileName.getValue(); + StringChunk name = att.attachLongFileName; + if (name == null) name = att.attachFileName; + String attName = name.getValue(); + if(att.attachMimeTag != null && att.attachMimeTag.getValue() != null) { - ats = att.attachMimeTag.getValue() + " = " + ats; + attName = att.attachMimeTag.getValue() + " = " + attName; } - s.append("Attachment: " + ats + "\n"); + s.append("Attachment: " + attName + "\n"); } try { diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java index adbb966a8..5550adbb6 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java @@ -209,6 +209,25 @@ public final class TestOutlookTextExtractor extends POITestCase { ext.close(); } + public void testWithAttachedMessage() throws Exception { + POIFSFileSystem simple = new POIFSFileSystem( + new FileInputStream(samples.getFile("58214_with_attachment.msg")) + ); + MAPIMessage msg = new MAPIMessage(simple); + OutlookTextExtactor ext = new OutlookTextExtactor(msg); + String text = ext.getText(); + + // Check we got bits from the main message + assertContains(text, "Master mail"); + assertContains(text, "ante in lacinia euismod"); + + // But not the attached message + assertNotContained(text, "Test mail attachment"); + assertNotContained(text, "Lorem ipsum dolor sit"); + + ext.close(); + } + public void testEncodings() throws Exception { POIFSFileSystem simple = new POIFSFileSystem( new FileInputStream(samples.getFile("chinese-traditional.msg")) diff --git a/src/testcases/org/apache/poi/POITestCase.java b/src/testcases/org/apache/poi/POITestCase.java index e38e46d4f..3d0cea51d 100644 --- a/src/testcases/org/apache/poi/POITestCase.java +++ b/src/testcases/org/apache/poi/POITestCase.java @@ -33,6 +33,12 @@ public class POITestCase extends TestCase { haystack.contains(needle) ); } + public static void assertNotContained(String haystack, String needle) { + assertFalse( + "Unexpectedly found text '" + needle + "' in text:\n" + haystack, + haystack.contains(needle) + ); + } public static void assertEquals(T[] expected, T[] actual) {