Support nested outlook files in ExtractorFactory
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@982334 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
514e6be1fe
commit
8dcf35452d
@ -50,7 +50,6 @@ import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.Entry;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.xslf.XSLFSlideShow;
|
||||
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
|
||||
import org.apache.poi.xslf.usermodel.XSLFRelation;
|
||||
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
|
||||
@ -289,8 +288,10 @@ public class ExtractorFactory {
|
||||
MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
|
||||
for(AttachmentChunks attachment : msg.getAttachmentFiles()) {
|
||||
if(attachment.attachData != null) {
|
||||
byte[] data = attachment.attachData.getValue();
|
||||
nonPOIFS.add( new ByteArrayInputStream(data) );
|
||||
byte[] data = attachment.attachData.getValue();
|
||||
nonPOIFS.add( new ByteArrayInputStream(data) );
|
||||
} else if(attachment.attachmentDirectory != null) {
|
||||
dirs.add(attachment.attachmentDirectory.getDirectory());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -66,6 +66,7 @@ public class TestExtractorFactory extends TestCase {
|
||||
|
||||
private File msg;
|
||||
private File msgEmb;
|
||||
private File msgEmbMsg;
|
||||
|
||||
private File vsd;
|
||||
|
||||
@ -102,6 +103,7 @@ public class TestExtractorFactory extends TestCase {
|
||||
POIDataSamples olTests = POIDataSamples.getHSMFInstance();
|
||||
msg = olTests.getFile("quick.msg");
|
||||
msgEmb = olTests.getFile("attachment_test_msg.msg");
|
||||
msgEmbMsg = olTests.getFile("attachment_msg_pdf.msg");
|
||||
}
|
||||
|
||||
public void testFile() throws Exception {
|
||||
@ -534,51 +536,77 @@ public class TestExtractorFactory extends TestCase {
|
||||
embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
|
||||
|
||||
assertEquals(6, embeds.length);
|
||||
int numWord = 0, numXls = 0, numPpt = 0;
|
||||
int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0;
|
||||
for(int i=0; i<embeds.length; i++) {
|
||||
assertTrue(embeds[i].getText().length() > 20);
|
||||
|
||||
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
|
||||
else if(embeds[i] instanceof ExcelExtractor) numXls++;
|
||||
else if(embeds[i] instanceof WordExtractor) numWord++;
|
||||
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
|
||||
}
|
||||
assertEquals(2, numPpt);
|
||||
assertEquals(2, numXls);
|
||||
assertEquals(2, numWord);
|
||||
assertEquals(0, numMsg);
|
||||
|
||||
// Word
|
||||
ext = (POIOLE2TextExtractor)
|
||||
ExtractorFactory.createExtractor(docEmb);
|
||||
embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
|
||||
|
||||
numWord = 0; numXls = 0; numPpt = 0;
|
||||
numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
|
||||
assertEquals(4, embeds.length);
|
||||
for(int i=0; i<embeds.length; i++) {
|
||||
assertTrue(embeds[i].getText().length() > 20);
|
||||
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
|
||||
else if(embeds[i] instanceof ExcelExtractor) numXls++;
|
||||
else if(embeds[i] instanceof WordExtractor) numWord++;
|
||||
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
|
||||
}
|
||||
assertEquals(1, numPpt);
|
||||
assertEquals(2, numXls);
|
||||
assertEquals(1, numWord);
|
||||
assertEquals(0, numMsg);
|
||||
|
||||
// Outlook
|
||||
ext = (OutlookTextExtactor)
|
||||
ExtractorFactory.createExtractor(msgEmb);
|
||||
embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
|
||||
|
||||
numWord = 0; numXls = 0; numPpt = 0;
|
||||
numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
|
||||
assertEquals(1, embeds.length);
|
||||
for(int i=0; i<embeds.length; i++) {
|
||||
assertTrue(embeds[i].getText().length() > 20);
|
||||
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
|
||||
else if(embeds[i] instanceof ExcelExtractor) numXls++;
|
||||
else if(embeds[i] instanceof WordExtractor) numWord++;
|
||||
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
|
||||
}
|
||||
assertEquals(0, numPpt);
|
||||
assertEquals(0, numXls);
|
||||
assertEquals(1, numWord);
|
||||
assertEquals(0, numMsg);
|
||||
|
||||
// Outlook with another outlook file in it
|
||||
ext = (OutlookTextExtactor)
|
||||
ExtractorFactory.createExtractor(msgEmbMsg);
|
||||
embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
|
||||
|
||||
numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
|
||||
assertEquals(1, embeds.length);
|
||||
for(int i=0; i<embeds.length; i++) {
|
||||
assertTrue(embeds[i].getText().length() > 20);
|
||||
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
|
||||
else if(embeds[i] instanceof ExcelExtractor) numXls++;
|
||||
else if(embeds[i] instanceof WordExtractor) numWord++;
|
||||
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
|
||||
}
|
||||
assertEquals(0, numPpt);
|
||||
assertEquals(0, numXls);
|
||||
assertEquals(0, numWord);
|
||||
assertEquals(1, numMsg);
|
||||
|
||||
|
||||
// TODO - PowerPoint
|
||||
// TODO - Publisher
|
||||
|
Loading…
Reference in New Issue
Block a user