From 8dcf35452d5d4e1655d24158b6b96c7a1e741967 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Wed, 4 Aug 2010 17:22:15 +0000 Subject: [PATCH] Support nested outlook files in ExtractorFactory git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@982334 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/extractor/ExtractorFactory.java | 7 ++-- .../poi/extractor/TestExtractorFactory.java | 34 +++++++++++++++++-- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java index 4864714f4..52912848e 100644 --- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java +++ b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java @@ -50,7 +50,6 @@ import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.POIFSFileSystem; -import org.apache.poi.xslf.XSLFSlideShow; import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; import org.apache.poi.xslf.usermodel.XSLFRelation; import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; @@ -289,8 +288,10 @@ public class ExtractorFactory { MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage(); for(AttachmentChunks attachment : msg.getAttachmentFiles()) { if(attachment.attachData != null) { - byte[] data = attachment.attachData.getValue(); - nonPOIFS.add( new ByteArrayInputStream(data) ); + byte[] data = attachment.attachData.getValue(); + nonPOIFS.add( new ByteArrayInputStream(data) ); + } else if(attachment.attachmentDirectory != null) { + dirs.add(attachment.attachmentDirectory.getDirectory()); } } } diff --git a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java index 0e4edeef9..4def3d326 100644 --- a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java +++ b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java @@ -66,6 +66,7 @@ public class TestExtractorFactory extends TestCase { private File msg; private File msgEmb; + private File msgEmbMsg; private File vsd; @@ -102,6 +103,7 @@ public class TestExtractorFactory extends TestCase { POIDataSamples olTests = POIDataSamples.getHSMFInstance(); msg = olTests.getFile("quick.msg"); msgEmb = olTests.getFile("attachment_test_msg.msg"); + msgEmbMsg = olTests.getFile("attachment_msg_pdf.msg"); } public void testFile() throws Exception { @@ -534,51 +536,77 @@ public class TestExtractorFactory extends TestCase { embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext); assertEquals(6, embeds.length); - int numWord = 0, numXls = 0, numPpt = 0; + int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0; for(int i=0; i 20); if(embeds[i] instanceof PowerPointExtractor) numPpt++; else if(embeds[i] instanceof ExcelExtractor) numXls++; else if(embeds[i] instanceof WordExtractor) numWord++; + else if(embeds[i] instanceof OutlookTextExtactor) numMsg++; } assertEquals(2, numPpt); assertEquals(2, numXls); assertEquals(2, numWord); + assertEquals(0, numMsg); // Word ext = (POIOLE2TextExtractor) ExtractorFactory.createExtractor(docEmb); embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext); - numWord = 0; numXls = 0; numPpt = 0; + numWord = 0; numXls = 0; numPpt = 0; numMsg = 0; assertEquals(4, embeds.length); for(int i=0; i 20); if(embeds[i] instanceof PowerPointExtractor) numPpt++; else if(embeds[i] instanceof ExcelExtractor) numXls++; else if(embeds[i] instanceof WordExtractor) numWord++; + else if(embeds[i] instanceof OutlookTextExtactor) numMsg++; } assertEquals(1, numPpt); assertEquals(2, numXls); assertEquals(1, numWord); + assertEquals(0, numMsg); // Outlook ext = (OutlookTextExtactor) ExtractorFactory.createExtractor(msgEmb); embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext); - numWord = 0; numXls = 0; numPpt = 0; + numWord = 0; numXls = 0; numPpt = 0; numMsg = 0; assertEquals(1, embeds.length); for(int i=0; i 20); if(embeds[i] instanceof PowerPointExtractor) numPpt++; else if(embeds[i] instanceof ExcelExtractor) numXls++; else if(embeds[i] instanceof WordExtractor) numWord++; + else if(embeds[i] instanceof OutlookTextExtactor) numMsg++; } assertEquals(0, numPpt); assertEquals(0, numXls); assertEquals(1, numWord); + assertEquals(0, numMsg); + + // Outlook with another outlook file in it + ext = (OutlookTextExtactor) + ExtractorFactory.createExtractor(msgEmbMsg); + embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext); + + numWord = 0; numXls = 0; numPpt = 0; numMsg = 0; + assertEquals(1, embeds.length); + for(int i=0; i 20); + if(embeds[i] instanceof PowerPointExtractor) numPpt++; + else if(embeds[i] instanceof ExcelExtractor) numXls++; + else if(embeds[i] instanceof WordExtractor) numWord++; + else if(embeds[i] instanceof OutlookTextExtactor) numMsg++; + } + assertEquals(0, numPpt); + assertEquals(0, numXls); + assertEquals(0, numWord); + assertEquals(1, numMsg); + // TODO - PowerPoint // TODO - Publisher