Support nested outlook files in ExtractorFactory

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@982334 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-08-04 17:22:15 +00:00
parent 514e6be1fe
commit 8dcf35452d
2 changed files with 35 additions and 6 deletions

View File

@ -50,7 +50,6 @@ import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xslf.XSLFSlideShow;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xslf.usermodel.XSLFRelation;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
@ -289,8 +288,10 @@ public class ExtractorFactory {
MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
for(AttachmentChunks attachment : msg.getAttachmentFiles()) {
if(attachment.attachData != null) {
byte[] data = attachment.attachData.getValue();
nonPOIFS.add( new ByteArrayInputStream(data) );
byte[] data = attachment.attachData.getValue();
nonPOIFS.add( new ByteArrayInputStream(data) );
} else if(attachment.attachmentDirectory != null) {
dirs.add(attachment.attachmentDirectory.getDirectory());
}
}
}

View File

@ -66,6 +66,7 @@ public class TestExtractorFactory extends TestCase {
private File msg;
private File msgEmb;
private File msgEmbMsg;
private File vsd;
@ -102,6 +103,7 @@ public class TestExtractorFactory extends TestCase {
POIDataSamples olTests = POIDataSamples.getHSMFInstance();
msg = olTests.getFile("quick.msg");
msgEmb = olTests.getFile("attachment_test_msg.msg");
msgEmbMsg = olTests.getFile("attachment_msg_pdf.msg");
}
public void testFile() throws Exception {
@ -534,51 +536,77 @@ public class TestExtractorFactory extends TestCase {
embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
assertEquals(6, embeds.length);
int numWord = 0, numXls = 0, numPpt = 0;
int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0;
for(int i=0; i<embeds.length; i++) {
assertTrue(embeds[i].getText().length() > 20);
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
else if(embeds[i] instanceof ExcelExtractor) numXls++;
else if(embeds[i] instanceof WordExtractor) numWord++;
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
}
assertEquals(2, numPpt);
assertEquals(2, numXls);
assertEquals(2, numWord);
assertEquals(0, numMsg);
// Word
ext = (POIOLE2TextExtractor)
ExtractorFactory.createExtractor(docEmb);
embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
numWord = 0; numXls = 0; numPpt = 0;
numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
assertEquals(4, embeds.length);
for(int i=0; i<embeds.length; i++) {
assertTrue(embeds[i].getText().length() > 20);
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
else if(embeds[i] instanceof ExcelExtractor) numXls++;
else if(embeds[i] instanceof WordExtractor) numWord++;
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
}
assertEquals(1, numPpt);
assertEquals(2, numXls);
assertEquals(1, numWord);
assertEquals(0, numMsg);
// Outlook
ext = (OutlookTextExtactor)
ExtractorFactory.createExtractor(msgEmb);
embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
numWord = 0; numXls = 0; numPpt = 0;
numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
assertEquals(1, embeds.length);
for(int i=0; i<embeds.length; i++) {
assertTrue(embeds[i].getText().length() > 20);
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
else if(embeds[i] instanceof ExcelExtractor) numXls++;
else if(embeds[i] instanceof WordExtractor) numWord++;
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
}
assertEquals(0, numPpt);
assertEquals(0, numXls);
assertEquals(1, numWord);
assertEquals(0, numMsg);
// Outlook with another outlook file in it
ext = (OutlookTextExtactor)
ExtractorFactory.createExtractor(msgEmbMsg);
embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
assertEquals(1, embeds.length);
for(int i=0; i<embeds.length; i++) {
assertTrue(embeds[i].getText().length() > 20);
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
else if(embeds[i] instanceof ExcelExtractor) numXls++;
else if(embeds[i] instanceof WordExtractor) numWord++;
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
}
assertEquals(0, numPpt);
assertEquals(0, numXls);
assertEquals(0, numWord);
assertEquals(1, numMsg);
// TODO - PowerPoint
// TODO - Publisher