whitespace

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1753028 13f79535-47bb-0310-9956-ffa450edef68
2016-07-17 08:26:51 +00:00 · 2016-07-17 08:26:51 +00:00 · 1cf76af9e8
commit 1cf76af9e8
parent e0c50807c4
1 changed files with 238 additions and 242 deletions
--- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
+++ b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
@ -72,180 +72,180 @@ import org.apache.xmlbeans.XmlException;
 */
@SuppressWarnings("WeakerAccess")
 public class ExtractorFactory {
-	public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
-	protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
-	protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
+    public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
+    protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
+    protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;

-   /**
-    * Should this thread prefer event based over usermodel based extractors?
-    * (usermodel extractors tend to be more accurate, but use more memory)
-    * Default is false.
-    */
-	public static boolean getThreadPrefersEventExtractors() {
-	   return OLE2ExtractorFactory.getThreadPrefersEventExtractors();
-	}
+    /**
+     * Should this thread prefer event based over usermodel based extractors?
+     * (usermodel extractors tend to be more accurate, but use more memory)
+     * Default is false.
+     */
+    public static boolean getThreadPrefersEventExtractors() {
+        return OLE2ExtractorFactory.getThreadPrefersEventExtractors();
+    }

-   /**
-    * Should all threads prefer event based over usermodel based extractors?
-    * (usermodel extractors tend to be more accurate, but use more memory)
-    * Default is to use the thread level setting, which defaults to false.
-    */
-	public static Boolean getAllThreadsPreferEventExtractors() {
-	   return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors();
-	}
+    /**
+     * Should all threads prefer event based over usermodel based extractors?
+     * (usermodel extractors tend to be more accurate, but use more memory)
+     * Default is to use the thread level setting, which defaults to false.
+     */
+    public static Boolean getAllThreadsPreferEventExtractors() {
+        return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors();
+    }

-   /**
-    * Should this thread prefer event based over usermodel based extractors?
-    * Will only be used if the All Threads setting is null.
-    */
-   public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) {
-       OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors);
-   }
+    /**
+     * Should this thread prefer event based over usermodel based extractors?
+     * Will only be used if the All Threads setting is null.
+     */
+    public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) {
+         OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors);
+    }

-   /**
-    * Should all threads prefer event based over usermodel based extractors?
-    * If set, will take preference over the Thread level setting.
-    */
-   public static void setAllThreadsPreferEventExtractors(Boolean preferEventExtractors) {
-       OLE2ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors);
-   }
+    /**
+     * Should all threads prefer event based over usermodel based extractors?
+     * If set, will take preference over the Thread level setting.
+     */
+    public static void setAllThreadsPreferEventExtractors(Boolean preferEventExtractors) {
+         OLE2ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors);
+    }

-   /**
-    * Should this thread use event based extractors is available?
-    * Checks the all-threads one first, then thread specific.
-    */
-   protected static boolean getPreferEventExtractor() {
-       return OLE2ExtractorFactory.getPreferEventExtractor();
-   }
+    /**
+     * Should this thread use event based extractors is available?
+     * Checks the all-threads one first, then thread specific.
+     */
+    protected static boolean getPreferEventExtractor() {
+         return OLE2ExtractorFactory.getPreferEventExtractor();
+    }

-	public static POITextExtractor createExtractor(File f) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
-	    NPOIFSFileSystem fs = null;
+    public static POITextExtractor createExtractor(File f) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
+        NPOIFSFileSystem fs = null;
        try {
            fs = new NPOIFSFileSystem(f);
            POIOLE2TextExtractor extractor = createExtractor(fs);
            extractor.setFilesystem(fs);
            return extractor;
+
        } catch (OfficeXmlFileException e) {
            // ensure file-handle release
-			IOUtils.closeQuietly(fs);
-
+            IOUtils.closeQuietly(fs);
            return createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
+
        } catch (NotOLE2FileException ne) {
            // ensure file-handle release
-			IOUtils.closeQuietly(fs);
-
+            IOUtils.closeQuietly(fs);
            throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
-		} catch (OpenXML4JException e) {
-			// ensure file-handle release
-			IOUtils.closeQuietly(fs);

-			throw e;
-		} catch (XmlException e) {
-			// ensure file-handle release
-			IOUtils.closeQuietly(fs);
+        } catch (OpenXML4JException e) {
+            // ensure file-handle release
+            IOUtils.closeQuietly(fs);
+            throw e;

-			throw e;
-		} catch (IOException e) {
-			// ensure file-handle release
-			IOUtils.closeQuietly(fs);
+        } catch (XmlException e) {
+            // ensure file-handle release
+            IOUtils.closeQuietly(fs);
+            throw e;
+
+        } catch (IOException e) {
+            // ensure file-handle release
+            IOUtils.closeQuietly(fs);
+            throw e;

-			throw e;
        } catch (RuntimeException e) {
-			// ensure file-handle release
-			IOUtils.closeQuietly(fs);
+            // ensure file-handle release
+            IOUtils.closeQuietly(fs);
+            throw e;
+        }
+     }

-			throw e;
-		}
+    public static POITextExtractor createExtractor(InputStream inp) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
+        // Figure out the kind of stream
+        // If clearly doesn't do mark/reset, wrap up
+        if (! inp.markSupported()) {
+            inp = new PushbackInputStream(inp, 8);
+        }
+
+        if (NPOIFSFileSystem.hasPOIFSHeader(inp)) {
+            return createExtractor(new NPOIFSFileSystem(inp));
+        }
+        if (DocumentFactoryHelper.hasOOXMLHeader(inp)) {
+            return createExtractor(OPCPackage.open(inp));
+        }
+        throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
    }

-	public static POITextExtractor createExtractor(InputStream inp) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
-		// Figure out the kind of stream
-		// If clearly doesn't do mark/reset, wrap up
-		if(! inp.markSupported()) {
-			inp = new PushbackInputStream(inp, 8);
-		}
-
-		if(NPOIFSFileSystem.hasPOIFSHeader(inp)) {
-			return createExtractor(new NPOIFSFileSystem(inp));
-		}
-		if(DocumentFactoryHelper.hasOOXMLHeader(inp)) {
-			return createExtractor(OPCPackage.open(inp));
-		}
-		throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
-	}
-
-	/**
-	 * Tries to determine the actual type of file and produces a matching text-extractor for it.
-	 *
-	 * @param pkg An {@link OPCPackage}.
-	 * @return A {@link POIXMLTextExtractor} for the given file.
-	 * @throws IOException If an error occurs while reading the file 
-	 * @throws OpenXML4JException If an error parsing the OpenXML file format is found. 
-	 * @throws XmlException If an XML parsing error occurs.
-	 * @throws IllegalArgumentException If no matching file type could be found.
-	 */
-	public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
+    /**
+     * Tries to determine the actual type of file and produces a matching text-extractor for it.
+     *
+     * @param pkg An {@link OPCPackage}.
+     * @return A {@link POIXMLTextExtractor} for the given file.
+     * @throws IOException If an error occurs while reading the file 
+     * @throws OpenXML4JException If an error parsing the OpenXML file format is found. 
+     * @throws XmlException If an XML parsing error occurs.
+     * @throws IllegalArgumentException If no matching file type could be found.
+     */
+    public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
        try {
-    	   // Check for the normal Office core document
-           PackageRelationshipCollection core =
-                pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
-           
-           // If nothing was found, try some of the other OOXML-based core types
-           if (core.size() == 0) {
-               // Could it be an OOXML-Strict one?
-               core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL);
-           }
-           if (core.size() == 0) {
-               // Could it be a visio one?
-               core = pkg.getRelationshipsByType(VISIO_DOCUMENT_REL);
-               if (core.size() == 1)
-                   return new XDGFVisioExtractor(pkg);
-           }
-           
-           // Should just be a single core document, complain if not
-           if (core.size() != 1) {
-               throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
-           }
-    
-           // Grab the core document part, and try to identify from that
-           PackagePart corePart = pkg.getPart(core.getRelationship(0));
-    
-           // Is it XSSF?
-           for(XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
-              if(corePart.getContentType().equals(rel.getContentType())) {
-                 if(getPreferEventExtractor()) {
-                    return new XSSFEventBasedExcelExtractor(pkg);
-                 }
-    
-                 return new XSSFExcelExtractor(pkg);
-              }
-           }
-    
-           // Is it XWPF?
-           for(XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
-              if(corePart.getContentType().equals(rel.getContentType())) {
-                 return new XWPFWordExtractor(pkg);
-              }
-           }
-    
-           // Is it XSLF?
-           for(XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
-              if(corePart.getContentType().equals(rel.getContentType())) {
-                 return new XSLFPowerPointExtractor(pkg);
-              }
-           }
-    
-           // special handling for SlideShow-Theme-files, 
-           if(XSLFRelation.THEME_MANAGER.getContentType().equals(corePart.getContentType())) {
-               return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
-           }
-           
-           throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")");
-	    } catch (IOException e) {
-	        // ensure that we close the package again if there is an error opening it, however
-	        // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
-	        pkg.revert();
-	        throw e;
+            // Check for the normal Office core document
+            PackageRelationshipCollection core;
+            core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
+              
+            // If nothing was found, try some of the other OOXML-based core types
+            if (core.size() == 0) {
+                // Could it be an OOXML-Strict one?
+                core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL);
+            }
+            if (core.size() == 0) {
+                // Could it be a visio one?
+                core = pkg.getRelationshipsByType(VISIO_DOCUMENT_REL);
+                if (core.size() == 1)
+                    return new XDGFVisioExtractor(pkg);
+            }
+              
+            // Should just be a single core document, complain if not
+            if (core.size() != 1) {
+                throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
+            }
+     
+            // Grab the core document part, and try to identify from that
+            PackagePart corePart = pkg.getPart(core.getRelationship(0));
+
+            // Is it XSSF?
+            for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
+                if (corePart.getContentType().equals(rel.getContentType())) {
+                    if (getPreferEventExtractor()) {
+                        return new XSSFEventBasedExcelExtractor(pkg);
+                    }
+                    return new XSSFExcelExtractor(pkg);
+                }
+            }
+     
+            // Is it XWPF?
+            for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
+                if (corePart.getContentType().equals(rel.getContentType())) {
+                    return new XWPFWordExtractor(pkg);
+                }
+            }
+     
+            // Is it XSLF?
+            for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
+                if (corePart.getContentType().equals(rel.getContentType())) {
+                    return new XSLFPowerPointExtractor(pkg);
+                }
+            }
+     
+            // special handling for SlideShow-Theme-files, 
+            if (XSLFRelation.THEME_MANAGER.getContentType().equals(corePart.getContentType())) {
+                return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
+            }
+
+            throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")");
+
+        } catch (IOException e) {
+            // ensure that we close the package again if there is an error opening it, however
+            // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
+            pkg.revert();
+            throw e;
        } catch (OpenXML4JException e) {
            // ensure that we close the package again if there is an error opening it, however
            // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
@ -256,27 +256,25 @@ public class ExtractorFactory {
            // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
            pkg.revert();
            throw e;
-	    } catch (RuntimeException e) {
-           // ensure that we close the package again if there is an error opening it, however
-           // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
-           pkg.revert();
-           
-           throw e;
-	    }
-	}
+        } catch (RuntimeException e) {
+            // ensure that we close the package again if there is an error opening it, however
+            // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
+            pkg.revert();
+            throw e;
+        }
+    }

-	public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
-	    return OLE2ExtractorFactory.createExtractor(fs);
-	}
+    public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
+        return OLE2ExtractorFactory.createExtractor(fs);
+    }
    public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
        return OLE2ExtractorFactory.createExtractor(fs);
-     }
+    }
    public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
        return OLE2ExtractorFactory.createExtractor(fs);
-     }
+    }

-    public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException,
-            OpenXML4JException, XmlException
+    public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
    {
        // First, check for OOXML
        for (String entryName : poifsDir.getEntryNames()) {
@ -285,104 +283,102 @@ public class ExtractorFactory {
                return createExtractor(pkg);
            }
        }
-        
+
        // If not, ask the OLE2 code to check, with Scratchpad if possible
        return OLE2ExtractorFactory.createExtractor(poifsDir);
    }

-	/**
-	 * Returns an array of text extractors, one for each of
-	 *  the embedded documents in the file (if there are any).
-	 * If there are no embedded documents, you'll get back an
-	 *  empty array. Otherwise, you'll get one open
-	 *  {@link POITextExtractor} for each embedded file.
-	 */
-	public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
-	   // All the embedded directories we spotted
-		ArrayList<Entry> dirs = new ArrayList<Entry>();
-		// For anything else not directly held in as a POIFS directory
-		ArrayList<InputStream> nonPOIFS = new ArrayList<InputStream>();
+    /**
+     * Returns an array of text extractors, one for each of
+     *  the embedded documents in the file (if there are any).
+     * If there are no embedded documents, you'll get back an
+     *  empty array. Otherwise, you'll get one open
+     *  {@link POITextExtractor} for each embedded file.
+     */
+    public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
+        // All the embedded directories we spotted
+        ArrayList<Entry> dirs = new ArrayList<Entry>();
+        // For anything else not directly held in as a POIFS directory
+        ArrayList<InputStream> nonPOIFS = new ArrayList<InputStream>();

-      // Find all the embedded directories
-		DirectoryEntry root = ext.getRoot();
-		if(root == null) {
-			throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
-		}
+        // Find all the embedded directories
+        DirectoryEntry root = ext.getRoot();
+        if (root == null) {
+            throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
+        }

-		if(ext instanceof ExcelExtractor) {
-			// These are in MBD... under the root
-			Iterator<Entry> it = root.getEntries();
-			while(it.hasNext()) {
-				Entry entry = it.next();
-				if(entry.getName().startsWith("MBD")) {
-					dirs.add(entry);
-				}
-			}
-		} else if(ext instanceof WordExtractor) {
-			// These are in ObjectPool -> _... under the root
-			try {
-				DirectoryEntry op = (DirectoryEntry)
-				        root.getEntry("ObjectPool");
-				Iterator<Entry> it = op.getEntries();
-				while(it.hasNext()) {
-					Entry entry = it.next();
-					if(entry.getName().startsWith("_")) {
-						dirs.add(entry);
-					}
-				}
-			} catch(FileNotFoundException e) {
+        if (ext instanceof ExcelExtractor) {
+            // These are in MBD... under the root
+            Iterator<Entry> it = root.getEntries();
+            while (it.hasNext()) {
+                Entry entry = it.next();
+                if (entry.getName().startsWith("MBD")) {
+                    dirs.add(entry);
+                }
+            }
+        } else if (ext instanceof WordExtractor) {
+            // These are in ObjectPool -> _... under the root
+            try {
+                DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
+                Iterator<Entry> it = op.getEntries();
+                while (it.hasNext()) {
+                    Entry entry = it.next();
+                    if (entry.getName().startsWith("_")) {
+                        dirs.add(entry);
+                    }
+                }
+            } catch (FileNotFoundException e) {
                // ignored here
            }
-		//} else if(ext instanceof PowerPointExtractor) {
-			// Tricky, not stored directly in poifs
-			// TODO
-		} else if(ext instanceof OutlookTextExtactor) {
-		   // Stored in the Attachment blocks
-		   MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
-		   for(AttachmentChunks attachment : msg.getAttachmentFiles()) {
-		      if(attachment.attachData != null) {
-   		         byte[] data = attachment.attachData.getValue();
-   		         nonPOIFS.add( new ByteArrayInputStream(data) );
-		      } else if(attachment.attachmentDirectory != null) {
-		          dirs.add(attachment.attachmentDirectory.getDirectory());
-		      }
-		   }
-		}
+        //} else if(ext instanceof PowerPointExtractor) {
+            // Tricky, not stored directly in poifs
+            // TODO
+        } else if (ext instanceof OutlookTextExtactor) {
+            // Stored in the Attachment blocks
+            MAPIMessage msg = ((OutlookTextExtactor)ext).getMAPIMessage();
+            for (AttachmentChunks attachment : msg.getAttachmentFiles()) {
+                if (attachment.attachData != null) {
+                    byte[] data = attachment.attachData.getValue();
+                    nonPOIFS.add( new ByteArrayInputStream(data) );
+                } else if (attachment.attachmentDirectory != null) {
+                    dirs.add(attachment.attachmentDirectory.getDirectory());
+                }
+            }
+        }

-		// Create the extractors
-		if(dirs.size() == 0 && nonPOIFS.size() == 0){
-			return new POITextExtractor[0];
-		}
+        // Create the extractors
+        if (dirs.size() == 0 && nonPOIFS.size() == 0){
+            return new POITextExtractor[0];
+        }

-		ArrayList<POITextExtractor> e = new ArrayList<POITextExtractor>();
+        ArrayList<POITextExtractor> e = new ArrayList<POITextExtractor>();
        for (Entry dir : dirs) {
-            e.add(createExtractor(
-                    (DirectoryNode) dir
-            ));
+            e.add(createExtractor((DirectoryNode) dir));
        }
        for (InputStream nonPOIF : nonPOIFS) {
            try {
-                e.add(createExtractor(nonPOIF));
+                 e.add(createExtractor(nonPOIF));
            } catch (IllegalArgumentException ie) {
                // Ignore, just means it didn't contain
                //  a format we support as yet
            } catch (XmlException xe) {
-                throw new IOException(xe.getMessage());
+                 throw new IOException(xe.getMessage());
            } catch (OpenXML4JException oe) {
-                throw new IOException(oe.getMessage());
+                 throw new IOException(oe.getMessage());
            }
        }
-		return e.toArray(new POITextExtractor[e.size()]);
-	}
+        return e.toArray(new POITextExtractor[e.size()]);
+    }

-	/**
-	 * Returns an array of text extractors, one for each of
-	 *  the embedded documents in the file (if there are any).
-	 * If there are no embedded documents, you'll get back an
-	 *  empty array. Otherwise, you'll get one open
-	 *  {@link POITextExtractor} for each embedded file.
-	 */
-	public static POITextExtractor[] getEmbededDocsTextExtractors(@SuppressWarnings("UnusedParameters") POIXMLTextExtractor ext) {
-		throw new IllegalStateException("Not yet supported");
-	}
+    /**
+     * Returns an array of text extractors, one for each of
+     *  the embedded documents in the file (if there are any).
+     * If there are no embedded documents, you'll get back an
+     *  empty array. Otherwise, you'll get one open
+     *  {@link POITextExtractor} for each embedded file.
+     */
+    @SuppressWarnings("UnusedParameters")
+    public static POITextExtractor[] getEmbededDocsTextExtractors(POIXMLTextExtractor ext) {
+        throw new IllegalStateException("Not yet supported");
+    }
 }