Wire up the new HSMFTextExtactor to the ExtractorFactory
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@897246 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
bd2f63c721
commit
cefe4e1d28
@ -31,6 +31,7 @@ import org.apache.poi.POIXMLDocument;
|
||||
import org.apache.poi.POIXMLTextExtractor;
|
||||
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
|
||||
import org.apache.poi.hslf.extractor.PowerPointExtractor;
|
||||
import org.apache.poi.hsmf.extractor.HSMFTextExtactor;
|
||||
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
||||
import org.apache.poi.hwpf.extractor.WordExtractor;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
@ -138,6 +139,11 @@ public class ExtractorFactory {
|
||||
if(entry.getName().equals("VisioDocument")) {
|
||||
return new VisioTextExtractor(poifsDir, fs);
|
||||
}
|
||||
if(entry.getName().equals("__substg1.0_1000001E") ||
|
||||
entry.getName().equals("__substg1.0_0047001E") ||
|
||||
entry.getName().equals("__substg1.0_0037001E")) {
|
||||
return new HSMFTextExtactor(poifsDir, fs);
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
|
||||
}
|
||||
|
@ -25,6 +25,7 @@ import org.apache.poi.POITextExtractor;
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
|
||||
import org.apache.poi.hslf.extractor.PowerPointExtractor;
|
||||
import org.apache.poi.hsmf.extractor.HSMFTextExtactor;
|
||||
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
||||
import org.apache.poi.hwpf.extractor.WordExtractor;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
@ -57,6 +58,7 @@ public class TestExtractorFactory extends TestCase {
|
||||
private File ppt;
|
||||
private File pptx;
|
||||
|
||||
private File msg;
|
||||
private File vsd;
|
||||
|
||||
protected void setUp() throws Exception {
|
||||
@ -81,6 +83,9 @@ public class TestExtractorFactory extends TestCase {
|
||||
|
||||
POIDataSamples dgTests = POIDataSamples.getDiagramInstance();
|
||||
vsd = dgTests.getFile("Test_Visio-Some_Random_Text.vsd");
|
||||
|
||||
POIDataSamples olTests = POIDataSamples.getHSMFInstance();
|
||||
msg = olTests.getFile("quick.msg");
|
||||
}
|
||||
|
||||
public void testFile() throws Exception {
|
||||
@ -161,6 +166,15 @@ public class TestExtractorFactory extends TestCase {
|
||||
ExtractorFactory.createExtractor(vsd).getText().length() > 50
|
||||
);
|
||||
|
||||
// Outlook msg
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(msg)
|
||||
instanceof HSMFTextExtactor
|
||||
);
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(msg).getText().length() > 50
|
||||
);
|
||||
|
||||
// Text
|
||||
try {
|
||||
ExtractorFactory.createExtractor(txt);
|
||||
@ -231,6 +245,15 @@ public class TestExtractorFactory extends TestCase {
|
||||
ExtractorFactory.createExtractor(new FileInputStream(vsd)).getText().length() > 50
|
||||
);
|
||||
|
||||
// Outlook msg
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new FileInputStream(msg))
|
||||
instanceof HSMFTextExtactor
|
||||
);
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new FileInputStream(msg)).getText().length() > 50
|
||||
);
|
||||
|
||||
// Text
|
||||
try {
|
||||
ExtractorFactory.createExtractor(new FileInputStream(txt));
|
||||
@ -277,6 +300,15 @@ public class TestExtractorFactory extends TestCase {
|
||||
ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd))).getText().length() > 50
|
||||
);
|
||||
|
||||
// Outlook msg
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(msg)))
|
||||
instanceof HSMFTextExtactor
|
||||
);
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(msg))).getText().length() > 50
|
||||
);
|
||||
|
||||
// Text
|
||||
try {
|
||||
ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(txt)));
|
||||
@ -375,5 +407,6 @@ public class TestExtractorFactory extends TestCase {
|
||||
|
||||
// TODO - PowerPoint
|
||||
// TODO - Visio
|
||||
// TODO - Outlook
|
||||
}
|
||||
}
|
||||
|
@ -34,6 +34,7 @@ import org.apache.poi.hsmf.datatypes.RecipientChunks;
|
||||
import org.apache.poi.hsmf.datatypes.StringChunk;
|
||||
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
|
||||
import org.apache.poi.hsmf.parsers.POIFSChunkParser;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
|
||||
/**
|
||||
@ -78,15 +79,24 @@ public class MAPIMessage extends POIDocument {
|
||||
this(new POIFSFileSystem(in));
|
||||
}
|
||||
/**
|
||||
* Constructor for reading MSG Files from an input stream.
|
||||
* Constructor for reading MSG Files from a POIFS filesystem
|
||||
* @param in
|
||||
* @throws IOException
|
||||
*/
|
||||
public MAPIMessage(POIFSFileSystem fs) throws IOException {
|
||||
super(fs);
|
||||
this(fs.getRoot(), fs);
|
||||
}
|
||||
/**
|
||||
* Constructor for reading MSG Files from a certain
|
||||
* point within a POIFS filesystem
|
||||
* @param in
|
||||
* @throws IOException
|
||||
*/
|
||||
public MAPIMessage(DirectoryNode poifsDir, POIFSFileSystem fs) throws IOException {
|
||||
super(poifsDir, fs);
|
||||
|
||||
// Grab all the chunks
|
||||
ChunkGroup[] chunkGroups = POIFSChunkParser.parse(fs);
|
||||
ChunkGroup[] chunkGroups = POIFSChunkParser.parse(poifsDir);
|
||||
|
||||
// Grab interesting bits
|
||||
ArrayList<AttachmentChunks> attachments = new ArrayList<AttachmentChunks>();
|
||||
|
@ -23,12 +23,16 @@ import java.text.SimpleDateFormat;
|
||||
import org.apache.poi.POIOLE2TextExtractor;
|
||||
import org.apache.poi.hsmf.MAPIMessage;
|
||||
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
|
||||
public class HSMFTextExtactor extends POIOLE2TextExtractor {
|
||||
public HSMFTextExtactor(MAPIMessage msg) {
|
||||
super(msg);
|
||||
}
|
||||
public HSMFTextExtactor(DirectoryNode poifsDir, POIFSFileSystem fs) throws IOException {
|
||||
this(new MAPIMessage(poifsDir, fs));
|
||||
}
|
||||
public HSMFTextExtactor(POIFSFileSystem fs) throws IOException {
|
||||
this(new MAPIMessage(fs));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user