whitespace
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1753028 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e0c50807c4
commit
1cf76af9e8
@ -125,35 +125,35 @@ public class ExtractorFactory {
|
||||
POIOLE2TextExtractor extractor = createExtractor(fs);
|
||||
extractor.setFilesystem(fs);
|
||||
return extractor;
|
||||
|
||||
} catch (OfficeXmlFileException e) {
|
||||
// ensure file-handle release
|
||||
IOUtils.closeQuietly(fs);
|
||||
|
||||
return createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
|
||||
|
||||
} catch (NotOLE2FileException ne) {
|
||||
// ensure file-handle release
|
||||
IOUtils.closeQuietly(fs);
|
||||
|
||||
throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
|
||||
|
||||
} catch (OpenXML4JException e) {
|
||||
// ensure file-handle release
|
||||
IOUtils.closeQuietly(fs);
|
||||
|
||||
throw e;
|
||||
|
||||
} catch (XmlException e) {
|
||||
// ensure file-handle release
|
||||
IOUtils.closeQuietly(fs);
|
||||
|
||||
throw e;
|
||||
|
||||
} catch (IOException e) {
|
||||
// ensure file-handle release
|
||||
IOUtils.closeQuietly(fs);
|
||||
|
||||
throw e;
|
||||
|
||||
} catch (RuntimeException e) {
|
||||
// ensure file-handle release
|
||||
IOUtils.closeQuietly(fs);
|
||||
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
@ -161,14 +161,14 @@ public class ExtractorFactory {
|
||||
public static POITextExtractor createExtractor(InputStream inp) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
|
||||
// Figure out the kind of stream
|
||||
// If clearly doesn't do mark/reset, wrap up
|
||||
if(! inp.markSupported()) {
|
||||
if (! inp.markSupported()) {
|
||||
inp = new PushbackInputStream(inp, 8);
|
||||
}
|
||||
|
||||
if(NPOIFSFileSystem.hasPOIFSHeader(inp)) {
|
||||
if (NPOIFSFileSystem.hasPOIFSHeader(inp)) {
|
||||
return createExtractor(new NPOIFSFileSystem(inp));
|
||||
}
|
||||
if(DocumentFactoryHelper.hasOOXMLHeader(inp)) {
|
||||
if (DocumentFactoryHelper.hasOOXMLHeader(inp)) {
|
||||
return createExtractor(OPCPackage.open(inp));
|
||||
}
|
||||
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
|
||||
@ -187,8 +187,8 @@ public class ExtractorFactory {
|
||||
public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
|
||||
try {
|
||||
// Check for the normal Office core document
|
||||
PackageRelationshipCollection core =
|
||||
pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
|
||||
PackageRelationshipCollection core;
|
||||
core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
|
||||
|
||||
// If nothing was found, try some of the other OOXML-based core types
|
||||
if (core.size() == 0) {
|
||||
@ -211,36 +211,36 @@ public class ExtractorFactory {
|
||||
PackagePart corePart = pkg.getPart(core.getRelationship(0));
|
||||
|
||||
// Is it XSSF?
|
||||
for(XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
|
||||
if(corePart.getContentType().equals(rel.getContentType())) {
|
||||
if(getPreferEventExtractor()) {
|
||||
for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
|
||||
if (corePart.getContentType().equals(rel.getContentType())) {
|
||||
if (getPreferEventExtractor()) {
|
||||
return new XSSFEventBasedExcelExtractor(pkg);
|
||||
}
|
||||
|
||||
return new XSSFExcelExtractor(pkg);
|
||||
}
|
||||
}
|
||||
|
||||
// Is it XWPF?
|
||||
for(XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
|
||||
if(corePart.getContentType().equals(rel.getContentType())) {
|
||||
for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
|
||||
if (corePart.getContentType().equals(rel.getContentType())) {
|
||||
return new XWPFWordExtractor(pkg);
|
||||
}
|
||||
}
|
||||
|
||||
// Is it XSLF?
|
||||
for(XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
|
||||
if(corePart.getContentType().equals(rel.getContentType())) {
|
||||
for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
|
||||
if (corePart.getContentType().equals(rel.getContentType())) {
|
||||
return new XSLFPowerPointExtractor(pkg);
|
||||
}
|
||||
}
|
||||
|
||||
// special handling for SlideShow-Theme-files,
|
||||
if(XSLFRelation.THEME_MANAGER.getContentType().equals(corePart.getContentType())) {
|
||||
if (XSLFRelation.THEME_MANAGER.getContentType().equals(corePart.getContentType())) {
|
||||
return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
|
||||
}
|
||||
|
||||
throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")");
|
||||
|
||||
} catch (IOException e) {
|
||||
// ensure that we close the package again if there is an error opening it, however
|
||||
// we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
|
||||
@ -260,7 +260,6 @@ public class ExtractorFactory {
|
||||
// ensure that we close the package again if there is an error opening it, however
|
||||
// we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
|
||||
pkg.revert();
|
||||
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
@ -275,8 +274,7 @@ public class ExtractorFactory {
|
||||
return OLE2ExtractorFactory.createExtractor(fs);
|
||||
}
|
||||
|
||||
public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException,
|
||||
OpenXML4JException, XmlException
|
||||
public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
|
||||
{
|
||||
// First, check for OOXML
|
||||
for (String entryName : poifsDir.getEntryNames()) {
|
||||
@ -305,60 +303,57 @@ public class ExtractorFactory {
|
||||
|
||||
// Find all the embedded directories
|
||||
DirectoryEntry root = ext.getRoot();
|
||||
if(root == null) {
|
||||
if (root == null) {
|
||||
throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
|
||||
}
|
||||
|
||||
if(ext instanceof ExcelExtractor) {
|
||||
if (ext instanceof ExcelExtractor) {
|
||||
// These are in MBD... under the root
|
||||
Iterator<Entry> it = root.getEntries();
|
||||
while(it.hasNext()) {
|
||||
while (it.hasNext()) {
|
||||
Entry entry = it.next();
|
||||
if(entry.getName().startsWith("MBD")) {
|
||||
if (entry.getName().startsWith("MBD")) {
|
||||
dirs.add(entry);
|
||||
}
|
||||
}
|
||||
} else if(ext instanceof WordExtractor) {
|
||||
} else if (ext instanceof WordExtractor) {
|
||||
// These are in ObjectPool -> _... under the root
|
||||
try {
|
||||
DirectoryEntry op = (DirectoryEntry)
|
||||
root.getEntry("ObjectPool");
|
||||
DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
|
||||
Iterator<Entry> it = op.getEntries();
|
||||
while(it.hasNext()) {
|
||||
while (it.hasNext()) {
|
||||
Entry entry = it.next();
|
||||
if(entry.getName().startsWith("_")) {
|
||||
if (entry.getName().startsWith("_")) {
|
||||
dirs.add(entry);
|
||||
}
|
||||
}
|
||||
} catch(FileNotFoundException e) {
|
||||
} catch (FileNotFoundException e) {
|
||||
// ignored here
|
||||
}
|
||||
//} else if(ext instanceof PowerPointExtractor) {
|
||||
// Tricky, not stored directly in poifs
|
||||
// TODO
|
||||
} else if(ext instanceof OutlookTextExtactor) {
|
||||
} else if (ext instanceof OutlookTextExtactor) {
|
||||
// Stored in the Attachment blocks
|
||||
MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
|
||||
for(AttachmentChunks attachment : msg.getAttachmentFiles()) {
|
||||
if(attachment.attachData != null) {
|
||||
for (AttachmentChunks attachment : msg.getAttachmentFiles()) {
|
||||
if (attachment.attachData != null) {
|
||||
byte[] data = attachment.attachData.getValue();
|
||||
nonPOIFS.add( new ByteArrayInputStream(data) );
|
||||
} else if(attachment.attachmentDirectory != null) {
|
||||
} else if (attachment.attachmentDirectory != null) {
|
||||
dirs.add(attachment.attachmentDirectory.getDirectory());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create the extractors
|
||||
if(dirs.size() == 0 && nonPOIFS.size() == 0){
|
||||
if (dirs.size() == 0 && nonPOIFS.size() == 0){
|
||||
return new POITextExtractor[0];
|
||||
}
|
||||
|
||||
ArrayList<POITextExtractor> e = new ArrayList<POITextExtractor>();
|
||||
for (Entry dir : dirs) {
|
||||
e.add(createExtractor(
|
||||
(DirectoryNode) dir
|
||||
));
|
||||
e.add(createExtractor((DirectoryNode) dir));
|
||||
}
|
||||
for (InputStream nonPOIF : nonPOIFS) {
|
||||
try {
|
||||
@ -382,7 +377,8 @@ public class ExtractorFactory {
|
||||
* empty array. Otherwise, you'll get one open
|
||||
* {@link POITextExtractor} for each embedded file.
|
||||
*/
|
||||
public static POITextExtractor[] getEmbededDocsTextExtractors(@SuppressWarnings("UnusedParameters") POIXMLTextExtractor ext) {
|
||||
@SuppressWarnings("UnusedParameters")
|
||||
public static POITextExtractor[] getEmbededDocsTextExtractors(POIXMLTextExtractor ext) {
|
||||
throw new IllegalStateException("Not yet supported");
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user