Check for null in IOUtils.closeQuietly() to not log this unnecessarily
Add coverage for some more methods in ExtractorFactory Fix some IntelliJ warnings git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1736146 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dd308cd87e
commit
d806c2ac2b
@ -79,7 +79,7 @@ public final class IOUtils {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream(length == Integer.MAX_VALUE ? 4096 : length);
|
||||
|
||||
byte[] buffer = new byte[4096];
|
||||
int totalBytes = 0, readBytes = 0;
|
||||
int totalBytes = 0, readBytes;
|
||||
do {
|
||||
readBytes = stream.read(buffer, 0, Math.min(buffer.length, length-totalBytes));
|
||||
totalBytes += Math.max(readBytes,0);
|
||||
@ -218,6 +218,11 @@ public final class IOUtils {
|
||||
* resource to close
|
||||
*/
|
||||
public static void closeQuietly( final Closeable closeable ) {
|
||||
// no need to log a NullPointerException here
|
||||
if(closeable == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
closeable.close();
|
||||
} catch ( Exception exc ) {
|
||||
|
@ -30,7 +30,6 @@ import java.util.Iterator;
|
||||
|
||||
import org.apache.poi.POIOLE2TextExtractor;
|
||||
import org.apache.poi.POITextExtractor;
|
||||
import org.apache.poi.POIXMLDocument;
|
||||
import org.apache.poi.POIXMLTextExtractor;
|
||||
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
|
||||
import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
|
||||
@ -52,6 +51,7 @@ import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
|
||||
import org.apache.poi.poifs.filesystem.*;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
|
||||
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
|
||||
import org.apache.poi.xslf.usermodel.XSLFRelation;
|
||||
@ -67,6 +67,7 @@ import org.apache.xmlbeans.XmlException;
|
||||
* Figures out the correct POITextExtractor for your supplied
|
||||
* document, and returns it.
|
||||
*/
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
public class ExtractorFactory {
|
||||
public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
|
||||
protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
|
||||
@ -136,39 +137,33 @@ public class ExtractorFactory {
|
||||
return extractor;
|
||||
} catch (OfficeXmlFileException e) {
|
||||
// ensure file-handle release
|
||||
if(fs != null) {
|
||||
fs.close();
|
||||
}
|
||||
IOUtils.closeQuietly(fs);
|
||||
|
||||
return createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
|
||||
} catch (NotOLE2FileException ne) {
|
||||
// ensure file-handle release
|
||||
if(fs != null) {
|
||||
fs.close();
|
||||
}
|
||||
IOUtils.closeQuietly(fs);
|
||||
|
||||
throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
|
||||
} catch (OpenXML4JException e) {
|
||||
// ensure file-handle release
|
||||
if(fs != null) {
|
||||
fs.close();
|
||||
}
|
||||
IOUtils.closeQuietly(fs);
|
||||
|
||||
throw e;
|
||||
} catch (XmlException e) {
|
||||
// ensure file-handle release
|
||||
if(fs != null) {
|
||||
fs.close();
|
||||
}
|
||||
IOUtils.closeQuietly(fs);
|
||||
|
||||
throw e;
|
||||
} catch (IOException e) {
|
||||
// ensure file-handle release
|
||||
if(fs != null) {
|
||||
fs.close();
|
||||
}
|
||||
IOUtils.closeQuietly(fs);
|
||||
|
||||
throw e;
|
||||
} catch (RuntimeException e) {
|
||||
// ensure file-handle release
|
||||
if(fs != null) {
|
||||
fs.close();
|
||||
}
|
||||
IOUtils.closeQuietly(fs);
|
||||
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
@ -280,21 +275,21 @@ public class ExtractorFactory {
|
||||
}
|
||||
}
|
||||
|
||||
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
|
||||
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
||||
// Only ever an OLE2 one from the root of the FS
|
||||
return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
|
||||
}
|
||||
public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
|
||||
public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
||||
// Only ever an OLE2 one from the root of the FS
|
||||
return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
|
||||
}
|
||||
public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
|
||||
public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
||||
// Only ever an OLE2 one from the root of the FS
|
||||
return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
|
||||
}
|
||||
|
||||
public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException,
|
||||
InvalidFormatException, OpenXML4JException, XmlException
|
||||
OpenXML4JException, XmlException
|
||||
{
|
||||
// Look for certain entries in the stream, to figure it
|
||||
// out from
|
||||
@ -359,7 +354,7 @@ public class ExtractorFactory {
|
||||
* empty array. Otherwise, you'll get one open
|
||||
* {@link POITextExtractor} for each embedded file.
|
||||
*/
|
||||
public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
|
||||
public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
|
||||
// All the embded directories we spotted
|
||||
ArrayList<Entry> dirs = new ArrayList<Entry>();
|
||||
// For anything else not directly held in as a POIFS directory
|
||||
@ -392,7 +387,9 @@ public class ExtractorFactory {
|
||||
dirs.add(entry);
|
||||
}
|
||||
}
|
||||
} catch(FileNotFoundException e) {}
|
||||
} catch(FileNotFoundException e) {
|
||||
// ignored here
|
||||
}
|
||||
} else if(ext instanceof PowerPointExtractor) {
|
||||
// Tricky, not stored directly in poifs
|
||||
// TODO
|
||||
@ -415,23 +412,23 @@ public class ExtractorFactory {
|
||||
}
|
||||
|
||||
ArrayList<POITextExtractor> e = new ArrayList<POITextExtractor>();
|
||||
for(int i=0; i<dirs.size(); i++) {
|
||||
e.add( createExtractor(
|
||||
(DirectoryNode)dirs.get(i)
|
||||
) );
|
||||
}
|
||||
for(int i=0; i<nonPOIFS.size(); i++) {
|
||||
try {
|
||||
e.add( createExtractor(nonPOIFS.get(i)) );
|
||||
} catch(IllegalArgumentException ie) {
|
||||
// Ignore, just means it didn't contain
|
||||
// a format we support as yet
|
||||
} catch(XmlException xe) {
|
||||
throw new IOException(xe.getMessage());
|
||||
} catch(OpenXML4JException oe) {
|
||||
throw new IOException(oe.getMessage());
|
||||
}
|
||||
}
|
||||
for (Entry dir : dirs) {
|
||||
e.add(createExtractor(
|
||||
(DirectoryNode) dir
|
||||
));
|
||||
}
|
||||
for (InputStream nonPOIF : nonPOIFS) {
|
||||
try {
|
||||
e.add(createExtractor(nonPOIF));
|
||||
} catch (IllegalArgumentException ie) {
|
||||
// Ignore, just means it didn't contain
|
||||
// a format we support as yet
|
||||
} catch (XmlException xe) {
|
||||
throw new IOException(xe.getMessage());
|
||||
} catch (OpenXML4JException oe) {
|
||||
throw new IOException(oe.getMessage());
|
||||
}
|
||||
}
|
||||
return e.toArray(new POITextExtractor[e.size()]);
|
||||
}
|
||||
|
||||
|
@ -44,7 +44,9 @@ import org.apache.poi.hwpf.extractor.WordExtractor;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.opc.PackageAccess;
|
||||
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
|
||||
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
|
||||
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
|
||||
@ -174,7 +176,7 @@ public class TestExtractorFactory {
|
||||
|
||||
// TODO Support OOXML-Strict, see bug #57699
|
||||
try {
|
||||
extractor = ExtractorFactory.createExtractor(xlsxStrict);
|
||||
/*extractor =*/ ExtractorFactory.createExtractor(xlsxStrict);
|
||||
fail("OOXML-Strict isn't yet supported");
|
||||
} catch (POIXMLException e) {
|
||||
// Expected, for now
|
||||
@ -467,7 +469,7 @@ public class TestExtractorFactory {
|
||||
ExtractorFactory.createExtractor(stream);
|
||||
fail();
|
||||
} finally {
|
||||
stream.close();
|
||||
IOUtils.closeQuietly(stream);
|
||||
}
|
||||
} catch(IllegalArgumentException e) {
|
||||
// Good
|
||||
@ -555,6 +557,88 @@ public class TestExtractorFactory {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testOPOIFS() throws Exception {
|
||||
// Excel
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(xls)))
|
||||
instanceof ExcelExtractor
|
||||
);
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(xls))).getText().length() > 200
|
||||
);
|
||||
|
||||
// Word
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc)))
|
||||
instanceof WordExtractor
|
||||
);
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc))).getText().length() > 120
|
||||
);
|
||||
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc6)))
|
||||
instanceof Word6Extractor
|
||||
);
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc6))).getText().length() > 20
|
||||
);
|
||||
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc95)))
|
||||
instanceof Word6Extractor
|
||||
);
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc95))).getText().length() > 120
|
||||
);
|
||||
|
||||
// PowerPoint
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(ppt)))
|
||||
instanceof PowerPointExtractor
|
||||
);
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(ppt))).getText().length() > 120
|
||||
);
|
||||
|
||||
// Visio
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(vsd)))
|
||||
instanceof VisioTextExtractor
|
||||
);
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(vsd))).getText().length() > 50
|
||||
);
|
||||
|
||||
// Publisher
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(pub)))
|
||||
instanceof PublisherTextExtractor
|
||||
);
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(pub))).getText().length() > 50
|
||||
);
|
||||
|
||||
// Outlook msg
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(msg)))
|
||||
instanceof OutlookTextExtactor
|
||||
);
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(msg))).getText().length() > 50
|
||||
);
|
||||
|
||||
// Text
|
||||
try {
|
||||
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(txt)));
|
||||
fail();
|
||||
} catch(IOException e) {
|
||||
// Good
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPackage() throws Exception {
|
||||
// Excel
|
||||
@ -721,13 +805,13 @@ public class TestExtractorFactory {
|
||||
|
||||
assertEquals(6, embeds.length);
|
||||
int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0, numWordX;
|
||||
for(int i=0; i<embeds.length; i++) {
|
||||
assertTrue(embeds[i].getText().length() > 20);
|
||||
for (POITextExtractor embed : embeds) {
|
||||
assertTrue(embed.getText().length() > 20);
|
||||
|
||||
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
|
||||
else if(embeds[i] instanceof ExcelExtractor) numXls++;
|
||||
else if(embeds[i] instanceof WordExtractor) numWord++;
|
||||
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
|
||||
if (embed instanceof PowerPointExtractor) numPpt++;
|
||||
else if (embed instanceof ExcelExtractor) numXls++;
|
||||
else if (embed instanceof WordExtractor) numWord++;
|
||||
else if (embed instanceof OutlookTextExtactor) numMsg++;
|
||||
}
|
||||
assertEquals(2, numPpt);
|
||||
assertEquals(2, numXls);
|
||||
@ -742,12 +826,12 @@ public class TestExtractorFactory {
|
||||
|
||||
numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
|
||||
assertEquals(4, embeds.length);
|
||||
for(int i=0; i<embeds.length; i++) {
|
||||
assertTrue(embeds[i].getText().length() > 20);
|
||||
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
|
||||
else if(embeds[i] instanceof ExcelExtractor) numXls++;
|
||||
else if(embeds[i] instanceof WordExtractor) numWord++;
|
||||
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
|
||||
for (POITextExtractor embed : embeds) {
|
||||
assertTrue(embed.getText().length() > 20);
|
||||
if (embed instanceof PowerPointExtractor) numPpt++;
|
||||
else if (embed instanceof ExcelExtractor) numXls++;
|
||||
else if (embed instanceof WordExtractor) numWord++;
|
||||
else if (embed instanceof OutlookTextExtactor) numMsg++;
|
||||
}
|
||||
assertEquals(1, numPpt);
|
||||
assertEquals(2, numXls);
|
||||
@ -762,13 +846,13 @@ public class TestExtractorFactory {
|
||||
|
||||
numWord = 0; numXls = 0; numPpt = 0; numMsg = 0; numWordX = 0;
|
||||
assertEquals(3, embeds.length);
|
||||
for(int i=0; i<embeds.length; i++) {
|
||||
assertTrue(embeds[i].getText().length() > 20);
|
||||
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
|
||||
else if(embeds[i] instanceof ExcelExtractor) numXls++;
|
||||
else if(embeds[i] instanceof WordExtractor) numWord++;
|
||||
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
|
||||
else if(embeds[i] instanceof XWPFWordExtractor) numWordX++;
|
||||
for (POITextExtractor embed : embeds) {
|
||||
assertTrue(embed.getText().length() > 20);
|
||||
if (embed instanceof PowerPointExtractor) numPpt++;
|
||||
else if (embed instanceof ExcelExtractor) numXls++;
|
||||
else if (embed instanceof WordExtractor) numWord++;
|
||||
else if (embed instanceof OutlookTextExtactor) numMsg++;
|
||||
else if (embed instanceof XWPFWordExtractor) numWordX++;
|
||||
}
|
||||
assertEquals(1, numPpt);
|
||||
assertEquals(1, numXls);
|
||||
@ -784,12 +868,12 @@ public class TestExtractorFactory {
|
||||
|
||||
numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
|
||||
assertEquals(1, embeds.length);
|
||||
for(int i=0; i<embeds.length; i++) {
|
||||
assertTrue(embeds[i].getText().length() > 20);
|
||||
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
|
||||
else if(embeds[i] instanceof ExcelExtractor) numXls++;
|
||||
else if(embeds[i] instanceof WordExtractor) numWord++;
|
||||
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
|
||||
for (POITextExtractor embed : embeds) {
|
||||
assertTrue(embed.getText().length() > 20);
|
||||
if (embed instanceof PowerPointExtractor) numPpt++;
|
||||
else if (embed instanceof ExcelExtractor) numXls++;
|
||||
else if (embed instanceof WordExtractor) numWord++;
|
||||
else if (embed instanceof OutlookTextExtactor) numMsg++;
|
||||
}
|
||||
assertEquals(0, numPpt);
|
||||
assertEquals(0, numXls);
|
||||
@ -804,12 +888,12 @@ public class TestExtractorFactory {
|
||||
|
||||
numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
|
||||
assertEquals(1, embeds.length);
|
||||
for(int i=0; i<embeds.length; i++) {
|
||||
assertTrue(embeds[i].getText().length() > 20);
|
||||
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
|
||||
else if(embeds[i] instanceof ExcelExtractor) numXls++;
|
||||
else if(embeds[i] instanceof WordExtractor) numWord++;
|
||||
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
|
||||
for (POITextExtractor embed : embeds) {
|
||||
assertTrue(embed.getText().length() > 20);
|
||||
if (embed instanceof PowerPointExtractor) numPpt++;
|
||||
else if (embed instanceof ExcelExtractor) numXls++;
|
||||
else if (embed instanceof WordExtractor) numWord++;
|
||||
else if (embed instanceof OutlookTextExtactor) numMsg++;
|
||||
}
|
||||
assertEquals(0, numPpt);
|
||||
assertEquals(0, numXls);
|
||||
@ -923,11 +1007,24 @@ public class TestExtractorFactory {
|
||||
* "No supported documents found in the OLE2 stream"
|
||||
*/
|
||||
@Test
|
||||
public void a() throws Exception {
|
||||
public void bug59074() throws Exception {
|
||||
try {
|
||||
ExtractorFactory.createExtractor(
|
||||
POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
|
||||
fail("Old excel formats not supported via ExtractorFactory");
|
||||
} catch (OldExcelFormatException e) {}
|
||||
} catch (OldExcelFormatException e) {
|
||||
// expected here
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetEmbeddedFromXMLExtractor() {
|
||||
try {
|
||||
// currently not implemented
|
||||
ExtractorFactory.getEmbededDocsTextExtractors((POIXMLTextExtractor)null);
|
||||
fail("Unsupported currently");
|
||||
} catch (IllegalStateException e) {
|
||||
// expected here
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user