whitespace
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1753028 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e0c50807c4
commit
1cf76af9e8
@ -72,180 +72,180 @@ import org.apache.xmlbeans.XmlException;
|
|||||||
*/
|
*/
|
||||||
@SuppressWarnings("WeakerAccess")
|
@SuppressWarnings("WeakerAccess")
|
||||||
public class ExtractorFactory {
|
public class ExtractorFactory {
|
||||||
public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
|
public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
|
||||||
protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
|
protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
|
||||||
protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
|
protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should this thread prefer event based over usermodel based extractors?
|
* Should this thread prefer event based over usermodel based extractors?
|
||||||
* (usermodel extractors tend to be more accurate, but use more memory)
|
* (usermodel extractors tend to be more accurate, but use more memory)
|
||||||
* Default is false.
|
* Default is false.
|
||||||
*/
|
*/
|
||||||
public static boolean getThreadPrefersEventExtractors() {
|
public static boolean getThreadPrefersEventExtractors() {
|
||||||
return OLE2ExtractorFactory.getThreadPrefersEventExtractors();
|
return OLE2ExtractorFactory.getThreadPrefersEventExtractors();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should all threads prefer event based over usermodel based extractors?
|
* Should all threads prefer event based over usermodel based extractors?
|
||||||
* (usermodel extractors tend to be more accurate, but use more memory)
|
* (usermodel extractors tend to be more accurate, but use more memory)
|
||||||
* Default is to use the thread level setting, which defaults to false.
|
* Default is to use the thread level setting, which defaults to false.
|
||||||
*/
|
*/
|
||||||
public static Boolean getAllThreadsPreferEventExtractors() {
|
public static Boolean getAllThreadsPreferEventExtractors() {
|
||||||
return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors();
|
return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should this thread prefer event based over usermodel based extractors?
|
* Should this thread prefer event based over usermodel based extractors?
|
||||||
* Will only be used if the All Threads setting is null.
|
* Will only be used if the All Threads setting is null.
|
||||||
*/
|
*/
|
||||||
public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) {
|
public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) {
|
||||||
OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors);
|
OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should all threads prefer event based over usermodel based extractors?
|
* Should all threads prefer event based over usermodel based extractors?
|
||||||
* If set, will take preference over the Thread level setting.
|
* If set, will take preference over the Thread level setting.
|
||||||
*/
|
*/
|
||||||
public static void setAllThreadsPreferEventExtractors(Boolean preferEventExtractors) {
|
public static void setAllThreadsPreferEventExtractors(Boolean preferEventExtractors) {
|
||||||
OLE2ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors);
|
OLE2ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should this thread use event based extractors is available?
|
* Should this thread use event based extractors is available?
|
||||||
* Checks the all-threads one first, then thread specific.
|
* Checks the all-threads one first, then thread specific.
|
||||||
*/
|
*/
|
||||||
protected static boolean getPreferEventExtractor() {
|
protected static boolean getPreferEventExtractor() {
|
||||||
return OLE2ExtractorFactory.getPreferEventExtractor();
|
return OLE2ExtractorFactory.getPreferEventExtractor();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static POITextExtractor createExtractor(File f) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
|
public static POITextExtractor createExtractor(File f) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
|
||||||
NPOIFSFileSystem fs = null;
|
NPOIFSFileSystem fs = null;
|
||||||
try {
|
try {
|
||||||
fs = new NPOIFSFileSystem(f);
|
fs = new NPOIFSFileSystem(f);
|
||||||
POIOLE2TextExtractor extractor = createExtractor(fs);
|
POIOLE2TextExtractor extractor = createExtractor(fs);
|
||||||
extractor.setFilesystem(fs);
|
extractor.setFilesystem(fs);
|
||||||
return extractor;
|
return extractor;
|
||||||
|
|
||||||
} catch (OfficeXmlFileException e) {
|
} catch (OfficeXmlFileException e) {
|
||||||
// ensure file-handle release
|
// ensure file-handle release
|
||||||
IOUtils.closeQuietly(fs);
|
IOUtils.closeQuietly(fs);
|
||||||
|
|
||||||
return createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
|
return createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
|
||||||
|
|
||||||
} catch (NotOLE2FileException ne) {
|
} catch (NotOLE2FileException ne) {
|
||||||
// ensure file-handle release
|
// ensure file-handle release
|
||||||
IOUtils.closeQuietly(fs);
|
IOUtils.closeQuietly(fs);
|
||||||
|
|
||||||
throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
|
throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
|
||||||
} catch (OpenXML4JException e) {
|
|
||||||
// ensure file-handle release
|
|
||||||
IOUtils.closeQuietly(fs);
|
|
||||||
|
|
||||||
throw e;
|
} catch (OpenXML4JException e) {
|
||||||
} catch (XmlException e) {
|
// ensure file-handle release
|
||||||
// ensure file-handle release
|
IOUtils.closeQuietly(fs);
|
||||||
IOUtils.closeQuietly(fs);
|
throw e;
|
||||||
|
|
||||||
throw e;
|
} catch (XmlException e) {
|
||||||
} catch (IOException e) {
|
// ensure file-handle release
|
||||||
// ensure file-handle release
|
IOUtils.closeQuietly(fs);
|
||||||
IOUtils.closeQuietly(fs);
|
throw e;
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
// ensure file-handle release
|
||||||
|
IOUtils.closeQuietly(fs);
|
||||||
|
throw e;
|
||||||
|
|
||||||
throw e;
|
|
||||||
} catch (RuntimeException e) {
|
} catch (RuntimeException e) {
|
||||||
// ensure file-handle release
|
// ensure file-handle release
|
||||||
IOUtils.closeQuietly(fs);
|
IOUtils.closeQuietly(fs);
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
throw e;
|
public static POITextExtractor createExtractor(InputStream inp) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
|
||||||
}
|
// Figure out the kind of stream
|
||||||
|
// If clearly doesn't do mark/reset, wrap up
|
||||||
|
if (! inp.markSupported()) {
|
||||||
|
inp = new PushbackInputStream(inp, 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NPOIFSFileSystem.hasPOIFSHeader(inp)) {
|
||||||
|
return createExtractor(new NPOIFSFileSystem(inp));
|
||||||
|
}
|
||||||
|
if (DocumentFactoryHelper.hasOOXMLHeader(inp)) {
|
||||||
|
return createExtractor(OPCPackage.open(inp));
|
||||||
|
}
|
||||||
|
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static POITextExtractor createExtractor(InputStream inp) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
|
/**
|
||||||
// Figure out the kind of stream
|
* Tries to determine the actual type of file and produces a matching text-extractor for it.
|
||||||
// If clearly doesn't do mark/reset, wrap up
|
*
|
||||||
if(! inp.markSupported()) {
|
* @param pkg An {@link OPCPackage}.
|
||||||
inp = new PushbackInputStream(inp, 8);
|
* @return A {@link POIXMLTextExtractor} for the given file.
|
||||||
}
|
* @throws IOException If an error occurs while reading the file
|
||||||
|
* @throws OpenXML4JException If an error parsing the OpenXML file format is found.
|
||||||
if(NPOIFSFileSystem.hasPOIFSHeader(inp)) {
|
* @throws XmlException If an XML parsing error occurs.
|
||||||
return createExtractor(new NPOIFSFileSystem(inp));
|
* @throws IllegalArgumentException If no matching file type could be found.
|
||||||
}
|
*/
|
||||||
if(DocumentFactoryHelper.hasOOXMLHeader(inp)) {
|
public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
|
||||||
return createExtractor(OPCPackage.open(inp));
|
|
||||||
}
|
|
||||||
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tries to determine the actual type of file and produces a matching text-extractor for it.
|
|
||||||
*
|
|
||||||
* @param pkg An {@link OPCPackage}.
|
|
||||||
* @return A {@link POIXMLTextExtractor} for the given file.
|
|
||||||
* @throws IOException If an error occurs while reading the file
|
|
||||||
* @throws OpenXML4JException If an error parsing the OpenXML file format is found.
|
|
||||||
* @throws XmlException If an XML parsing error occurs.
|
|
||||||
* @throws IllegalArgumentException If no matching file type could be found.
|
|
||||||
*/
|
|
||||||
public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
|
|
||||||
try {
|
try {
|
||||||
// Check for the normal Office core document
|
// Check for the normal Office core document
|
||||||
PackageRelationshipCollection core =
|
PackageRelationshipCollection core;
|
||||||
pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
|
core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
|
||||||
|
|
||||||
// If nothing was found, try some of the other OOXML-based core types
|
// If nothing was found, try some of the other OOXML-based core types
|
||||||
if (core.size() == 0) {
|
if (core.size() == 0) {
|
||||||
// Could it be an OOXML-Strict one?
|
// Could it be an OOXML-Strict one?
|
||||||
core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL);
|
core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL);
|
||||||
}
|
}
|
||||||
if (core.size() == 0) {
|
if (core.size() == 0) {
|
||||||
// Could it be a visio one?
|
// Could it be a visio one?
|
||||||
core = pkg.getRelationshipsByType(VISIO_DOCUMENT_REL);
|
core = pkg.getRelationshipsByType(VISIO_DOCUMENT_REL);
|
||||||
if (core.size() == 1)
|
if (core.size() == 1)
|
||||||
return new XDGFVisioExtractor(pkg);
|
return new XDGFVisioExtractor(pkg);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Should just be a single core document, complain if not
|
// Should just be a single core document, complain if not
|
||||||
if (core.size() != 1) {
|
if (core.size() != 1) {
|
||||||
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
|
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Grab the core document part, and try to identify from that
|
// Grab the core document part, and try to identify from that
|
||||||
PackagePart corePart = pkg.getPart(core.getRelationship(0));
|
PackagePart corePart = pkg.getPart(core.getRelationship(0));
|
||||||
|
|
||||||
// Is it XSSF?
|
// Is it XSSF?
|
||||||
for(XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
|
for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
|
||||||
if(corePart.getContentType().equals(rel.getContentType())) {
|
if (corePart.getContentType().equals(rel.getContentType())) {
|
||||||
if(getPreferEventExtractor()) {
|
if (getPreferEventExtractor()) {
|
||||||
return new XSSFEventBasedExcelExtractor(pkg);
|
return new XSSFEventBasedExcelExtractor(pkg);
|
||||||
}
|
}
|
||||||
|
return new XSSFExcelExtractor(pkg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return new XSSFExcelExtractor(pkg);
|
// Is it XWPF?
|
||||||
}
|
for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
|
||||||
}
|
if (corePart.getContentType().equals(rel.getContentType())) {
|
||||||
|
return new XWPFWordExtractor(pkg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Is it XWPF?
|
// Is it XSLF?
|
||||||
for(XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
|
for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
|
||||||
if(corePart.getContentType().equals(rel.getContentType())) {
|
if (corePart.getContentType().equals(rel.getContentType())) {
|
||||||
return new XWPFWordExtractor(pkg);
|
return new XSLFPowerPointExtractor(pkg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Is it XSLF?
|
// special handling for SlideShow-Theme-files,
|
||||||
for(XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
|
if (XSLFRelation.THEME_MANAGER.getContentType().equals(corePart.getContentType())) {
|
||||||
if(corePart.getContentType().equals(rel.getContentType())) {
|
return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
|
||||||
return new XSLFPowerPointExtractor(pkg);
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// special handling for SlideShow-Theme-files,
|
throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")");
|
||||||
if(XSLFRelation.THEME_MANAGER.getContentType().equals(corePart.getContentType())) {
|
|
||||||
return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")");
|
} catch (IOException e) {
|
||||||
} catch (IOException e) {
|
// ensure that we close the package again if there is an error opening it, however
|
||||||
// ensure that we close the package again if there is an error opening it, however
|
// we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
|
||||||
// we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
|
pkg.revert();
|
||||||
pkg.revert();
|
throw e;
|
||||||
throw e;
|
|
||||||
} catch (OpenXML4JException e) {
|
} catch (OpenXML4JException e) {
|
||||||
// ensure that we close the package again if there is an error opening it, however
|
// ensure that we close the package again if there is an error opening it, however
|
||||||
// we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
|
// we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
|
||||||
@ -256,27 +256,25 @@ public class ExtractorFactory {
|
|||||||
// we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
|
// we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
|
||||||
pkg.revert();
|
pkg.revert();
|
||||||
throw e;
|
throw e;
|
||||||
} catch (RuntimeException e) {
|
} catch (RuntimeException e) {
|
||||||
// ensure that we close the package again if there is an error opening it, however
|
// ensure that we close the package again if there is an error opening it, however
|
||||||
// we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
|
// we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
|
||||||
pkg.revert();
|
pkg.revert();
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
throw e;
|
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
||||||
}
|
return OLE2ExtractorFactory.createExtractor(fs);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
|
||||||
return OLE2ExtractorFactory.createExtractor(fs);
|
|
||||||
}
|
|
||||||
public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
||||||
return OLE2ExtractorFactory.createExtractor(fs);
|
return OLE2ExtractorFactory.createExtractor(fs);
|
||||||
}
|
}
|
||||||
public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
||||||
return OLE2ExtractorFactory.createExtractor(fs);
|
return OLE2ExtractorFactory.createExtractor(fs);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException,
|
public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
|
||||||
OpenXML4JException, XmlException
|
|
||||||
{
|
{
|
||||||
// First, check for OOXML
|
// First, check for OOXML
|
||||||
for (String entryName : poifsDir.getEntryNames()) {
|
for (String entryName : poifsDir.getEntryNames()) {
|
||||||
@ -290,99 +288,97 @@ public class ExtractorFactory {
|
|||||||
return OLE2ExtractorFactory.createExtractor(poifsDir);
|
return OLE2ExtractorFactory.createExtractor(poifsDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an array of text extractors, one for each of
|
* Returns an array of text extractors, one for each of
|
||||||
* the embedded documents in the file (if there are any).
|
* the embedded documents in the file (if there are any).
|
||||||
* If there are no embedded documents, you'll get back an
|
* If there are no embedded documents, you'll get back an
|
||||||
* empty array. Otherwise, you'll get one open
|
* empty array. Otherwise, you'll get one open
|
||||||
* {@link POITextExtractor} for each embedded file.
|
* {@link POITextExtractor} for each embedded file.
|
||||||
*/
|
*/
|
||||||
public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
|
public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
|
||||||
// All the embedded directories we spotted
|
// All the embedded directories we spotted
|
||||||
ArrayList<Entry> dirs = new ArrayList<Entry>();
|
ArrayList<Entry> dirs = new ArrayList<Entry>();
|
||||||
// For anything else not directly held in as a POIFS directory
|
// For anything else not directly held in as a POIFS directory
|
||||||
ArrayList<InputStream> nonPOIFS = new ArrayList<InputStream>();
|
ArrayList<InputStream> nonPOIFS = new ArrayList<InputStream>();
|
||||||
|
|
||||||
// Find all the embedded directories
|
// Find all the embedded directories
|
||||||
DirectoryEntry root = ext.getRoot();
|
DirectoryEntry root = ext.getRoot();
|
||||||
if(root == null) {
|
if (root == null) {
|
||||||
throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
|
throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
|
||||||
}
|
}
|
||||||
|
|
||||||
if(ext instanceof ExcelExtractor) {
|
if (ext instanceof ExcelExtractor) {
|
||||||
// These are in MBD... under the root
|
// These are in MBD... under the root
|
||||||
Iterator<Entry> it = root.getEntries();
|
Iterator<Entry> it = root.getEntries();
|
||||||
while(it.hasNext()) {
|
while (it.hasNext()) {
|
||||||
Entry entry = it.next();
|
Entry entry = it.next();
|
||||||
if(entry.getName().startsWith("MBD")) {
|
if (entry.getName().startsWith("MBD")) {
|
||||||
dirs.add(entry);
|
dirs.add(entry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if(ext instanceof WordExtractor) {
|
} else if (ext instanceof WordExtractor) {
|
||||||
// These are in ObjectPool -> _... under the root
|
// These are in ObjectPool -> _... under the root
|
||||||
try {
|
try {
|
||||||
DirectoryEntry op = (DirectoryEntry)
|
DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
|
||||||
root.getEntry("ObjectPool");
|
Iterator<Entry> it = op.getEntries();
|
||||||
Iterator<Entry> it = op.getEntries();
|
while (it.hasNext()) {
|
||||||
while(it.hasNext()) {
|
Entry entry = it.next();
|
||||||
Entry entry = it.next();
|
if (entry.getName().startsWith("_")) {
|
||||||
if(entry.getName().startsWith("_")) {
|
dirs.add(entry);
|
||||||
dirs.add(entry);
|
}
|
||||||
}
|
}
|
||||||
}
|
} catch (FileNotFoundException e) {
|
||||||
} catch(FileNotFoundException e) {
|
|
||||||
// ignored here
|
// ignored here
|
||||||
}
|
}
|
||||||
//} else if(ext instanceof PowerPointExtractor) {
|
//} else if(ext instanceof PowerPointExtractor) {
|
||||||
// Tricky, not stored directly in poifs
|
// Tricky, not stored directly in poifs
|
||||||
// TODO
|
// TODO
|
||||||
} else if(ext instanceof OutlookTextExtactor) {
|
} else if (ext instanceof OutlookTextExtactor) {
|
||||||
// Stored in the Attachment blocks
|
// Stored in the Attachment blocks
|
||||||
MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
|
MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
|
||||||
for(AttachmentChunks attachment : msg.getAttachmentFiles()) {
|
for (AttachmentChunks attachment : msg.getAttachmentFiles()) {
|
||||||
if(attachment.attachData != null) {
|
if (attachment.attachData != null) {
|
||||||
byte[] data = attachment.attachData.getValue();
|
byte[] data = attachment.attachData.getValue();
|
||||||
nonPOIFS.add( new ByteArrayInputStream(data) );
|
nonPOIFS.add( new ByteArrayInputStream(data) );
|
||||||
} else if(attachment.attachmentDirectory != null) {
|
} else if (attachment.attachmentDirectory != null) {
|
||||||
dirs.add(attachment.attachmentDirectory.getDirectory());
|
dirs.add(attachment.attachmentDirectory.getDirectory());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create the extractors
|
// Create the extractors
|
||||||
if(dirs.size() == 0 && nonPOIFS.size() == 0){
|
if (dirs.size() == 0 && nonPOIFS.size() == 0){
|
||||||
return new POITextExtractor[0];
|
return new POITextExtractor[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
ArrayList<POITextExtractor> e = new ArrayList<POITextExtractor>();
|
ArrayList<POITextExtractor> e = new ArrayList<POITextExtractor>();
|
||||||
for (Entry dir : dirs) {
|
for (Entry dir : dirs) {
|
||||||
e.add(createExtractor(
|
e.add(createExtractor((DirectoryNode) dir));
|
||||||
(DirectoryNode) dir
|
|
||||||
));
|
|
||||||
}
|
}
|
||||||
for (InputStream nonPOIF : nonPOIFS) {
|
for (InputStream nonPOIF : nonPOIFS) {
|
||||||
try {
|
try {
|
||||||
e.add(createExtractor(nonPOIF));
|
e.add(createExtractor(nonPOIF));
|
||||||
} catch (IllegalArgumentException ie) {
|
} catch (IllegalArgumentException ie) {
|
||||||
// Ignore, just means it didn't contain
|
// Ignore, just means it didn't contain
|
||||||
// a format we support as yet
|
// a format we support as yet
|
||||||
} catch (XmlException xe) {
|
} catch (XmlException xe) {
|
||||||
throw new IOException(xe.getMessage());
|
throw new IOException(xe.getMessage());
|
||||||
} catch (OpenXML4JException oe) {
|
} catch (OpenXML4JException oe) {
|
||||||
throw new IOException(oe.getMessage());
|
throw new IOException(oe.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return e.toArray(new POITextExtractor[e.size()]);
|
return e.toArray(new POITextExtractor[e.size()]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an array of text extractors, one for each of
|
* Returns an array of text extractors, one for each of
|
||||||
* the embedded documents in the file (if there are any).
|
* the embedded documents in the file (if there are any).
|
||||||
* If there are no embedded documents, you'll get back an
|
* If there are no embedded documents, you'll get back an
|
||||||
* empty array. Otherwise, you'll get one open
|
* empty array. Otherwise, you'll get one open
|
||||||
* {@link POITextExtractor} for each embedded file.
|
* {@link POITextExtractor} for each embedded file.
|
||||||
*/
|
*/
|
||||||
public static POITextExtractor[] getEmbededDocsTextExtractors(@SuppressWarnings("UnusedParameters") POIXMLTextExtractor ext) {
|
@SuppressWarnings("UnusedParameters")
|
||||||
throw new IllegalStateException("Not yet supported");
|
public static POITextExtractor[] getEmbededDocsTextExtractors(POIXMLTextExtractor ext) {
|
||||||
}
|
throw new IllegalStateException("Not yet supported");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user