Check for null in IOUtils.closeQuietly() to not log this unnecessarily

Add coverage for some  more methods in ExtractorFactory
Fix some IntelliJ warnings

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1736146 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2016-03-22 07:51:39 +00:00
parent dd308cd87e
commit d806c2ac2b
3 changed files with 178 additions and 79 deletions

View File

@ -79,7 +79,7 @@ public final class IOUtils {
ByteArrayOutputStream baos = new ByteArrayOutputStream(length == Integer.MAX_VALUE ? 4096 : length);
byte[] buffer = new byte[4096];
int totalBytes = 0, readBytes = 0;
int totalBytes = 0, readBytes;
do {
readBytes = stream.read(buffer, 0, Math.min(buffer.length, length-totalBytes));
totalBytes += Math.max(readBytes,0);
@ -218,6 +218,11 @@ public final class IOUtils {
* resource to close
*/
public static void closeQuietly( final Closeable closeable ) {
// no need to log a NullPointerException here
if(closeable == null) {
return;
}
try {
closeable.close();
} catch ( Exception exc ) {

View File

@ -30,7 +30,6 @@ import java.util.Iterator;
import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.POITextExtractor;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
@ -52,6 +51,7 @@ import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.poi.poifs.filesystem.*;
import org.apache.poi.util.IOUtils;
import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xslf.usermodel.XSLFRelation;
@ -67,6 +67,7 @@ import org.apache.xmlbeans.XmlException;
* Figures out the correct POITextExtractor for your supplied
* document, and returns it.
*/
@SuppressWarnings("WeakerAccess")
public class ExtractorFactory {
public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
@ -136,39 +137,33 @@ public class ExtractorFactory {
return extractor;
} catch (OfficeXmlFileException e) {
// ensure file-handle release
if(fs != null) {
fs.close();
}
IOUtils.closeQuietly(fs);
return createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
} catch (NotOLE2FileException ne) {
// ensure file-handle release
if(fs != null) {
fs.close();
}
IOUtils.closeQuietly(fs);
throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
} catch (OpenXML4JException e) {
// ensure file-handle release
if(fs != null) {
fs.close();
}
IOUtils.closeQuietly(fs);
throw e;
} catch (XmlException e) {
// ensure file-handle release
if(fs != null) {
fs.close();
}
IOUtils.closeQuietly(fs);
throw e;
} catch (IOException e) {
// ensure file-handle release
if(fs != null) {
fs.close();
}
IOUtils.closeQuietly(fs);
throw e;
} catch (RuntimeException e) {
// ensure file-handle release
if(fs != null) {
fs.close();
}
IOUtils.closeQuietly(fs);
throw e;
}
}
@ -280,21 +275,21 @@ public class ExtractorFactory {
}
}
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
// Only ever an OLE2 one from the root of the FS
return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
}
public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
// Only ever an OLE2 one from the root of the FS
return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
}
public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
// Only ever an OLE2 one from the root of the FS
return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
}
public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException,
InvalidFormatException, OpenXML4JException, XmlException
OpenXML4JException, XmlException
{
// Look for certain entries in the stream, to figure it
// out from
@ -359,7 +354,7 @@ public class ExtractorFactory {
* empty array. Otherwise, you'll get one open
* {@link POITextExtractor} for each embedded file.
*/
public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
// All the embded directories we spotted
ArrayList<Entry> dirs = new ArrayList<Entry>();
// For anything else not directly held in as a POIFS directory
@ -392,7 +387,9 @@ public class ExtractorFactory {
dirs.add(entry);
}
}
} catch(FileNotFoundException e) {}
} catch(FileNotFoundException e) {
// ignored here
}
} else if(ext instanceof PowerPointExtractor) {
// Tricky, not stored directly in poifs
// TODO
@ -415,23 +412,23 @@ public class ExtractorFactory {
}
ArrayList<POITextExtractor> e = new ArrayList<POITextExtractor>();
for(int i=0; i<dirs.size(); i++) {
e.add( createExtractor(
(DirectoryNode)dirs.get(i)
) );
}
for(int i=0; i<nonPOIFS.size(); i++) {
try {
e.add( createExtractor(nonPOIFS.get(i)) );
} catch(IllegalArgumentException ie) {
// Ignore, just means it didn't contain
// a format we support as yet
} catch(XmlException xe) {
throw new IOException(xe.getMessage());
} catch(OpenXML4JException oe) {
throw new IOException(oe.getMessage());
}
}
for (Entry dir : dirs) {
e.add(createExtractor(
(DirectoryNode) dir
));
}
for (InputStream nonPOIF : nonPOIFS) {
try {
e.add(createExtractor(nonPOIF));
} catch (IllegalArgumentException ie) {
// Ignore, just means it didn't contain
// a format we support as yet
} catch (XmlException xe) {
throw new IOException(xe.getMessage());
} catch (OpenXML4JException oe) {
throw new IOException(oe.getMessage());
}
}
return e.toArray(new POITextExtractor[e.size()]);
}

View File

@ -44,7 +44,9 @@ import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.IOUtils;
import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
@ -174,7 +176,7 @@ public class TestExtractorFactory {
// TODO Support OOXML-Strict, see bug #57699
try {
extractor = ExtractorFactory.createExtractor(xlsxStrict);
/*extractor =*/ ExtractorFactory.createExtractor(xlsxStrict);
fail("OOXML-Strict isn't yet supported");
} catch (POIXMLException e) {
// Expected, for now
@ -467,7 +469,7 @@ public class TestExtractorFactory {
ExtractorFactory.createExtractor(stream);
fail();
} finally {
stream.close();
IOUtils.closeQuietly(stream);
}
} catch(IllegalArgumentException e) {
// Good
@ -555,6 +557,88 @@ public class TestExtractorFactory {
}
}
@Test
public void testOPOIFS() throws Exception {
// Excel
assertTrue(
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(xls)))
instanceof ExcelExtractor
);
assertTrue(
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(xls))).getText().length() > 200
);
// Word
assertTrue(
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc)))
instanceof WordExtractor
);
assertTrue(
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc))).getText().length() > 120
);
assertTrue(
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc6)))
instanceof Word6Extractor
);
assertTrue(
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc6))).getText().length() > 20
);
assertTrue(
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc95)))
instanceof Word6Extractor
);
assertTrue(
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc95))).getText().length() > 120
);
// PowerPoint
assertTrue(
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(ppt)))
instanceof PowerPointExtractor
);
assertTrue(
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(ppt))).getText().length() > 120
);
// Visio
assertTrue(
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(vsd)))
instanceof VisioTextExtractor
);
assertTrue(
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(vsd))).getText().length() > 50
);
// Publisher
assertTrue(
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(pub)))
instanceof PublisherTextExtractor
);
assertTrue(
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(pub))).getText().length() > 50
);
// Outlook msg
assertTrue(
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(msg)))
instanceof OutlookTextExtactor
);
assertTrue(
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(msg))).getText().length() > 50
);
// Text
try {
ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(txt)));
fail();
} catch(IOException e) {
// Good
}
}
@Test
public void testPackage() throws Exception {
// Excel
@ -721,13 +805,13 @@ public class TestExtractorFactory {
assertEquals(6, embeds.length);
int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0, numWordX;
for(int i=0; i<embeds.length; i++) {
assertTrue(embeds[i].getText().length() > 20);
for (POITextExtractor embed : embeds) {
assertTrue(embed.getText().length() > 20);
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
else if(embeds[i] instanceof ExcelExtractor) numXls++;
else if(embeds[i] instanceof WordExtractor) numWord++;
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
if (embed instanceof PowerPointExtractor) numPpt++;
else if (embed instanceof ExcelExtractor) numXls++;
else if (embed instanceof WordExtractor) numWord++;
else if (embed instanceof OutlookTextExtactor) numMsg++;
}
assertEquals(2, numPpt);
assertEquals(2, numXls);
@ -742,12 +826,12 @@ public class TestExtractorFactory {
numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
assertEquals(4, embeds.length);
for(int i=0; i<embeds.length; i++) {
assertTrue(embeds[i].getText().length() > 20);
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
else if(embeds[i] instanceof ExcelExtractor) numXls++;
else if(embeds[i] instanceof WordExtractor) numWord++;
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
for (POITextExtractor embed : embeds) {
assertTrue(embed.getText().length() > 20);
if (embed instanceof PowerPointExtractor) numPpt++;
else if (embed instanceof ExcelExtractor) numXls++;
else if (embed instanceof WordExtractor) numWord++;
else if (embed instanceof OutlookTextExtactor) numMsg++;
}
assertEquals(1, numPpt);
assertEquals(2, numXls);
@ -762,13 +846,13 @@ public class TestExtractorFactory {
numWord = 0; numXls = 0; numPpt = 0; numMsg = 0; numWordX = 0;
assertEquals(3, embeds.length);
for(int i=0; i<embeds.length; i++) {
assertTrue(embeds[i].getText().length() > 20);
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
else if(embeds[i] instanceof ExcelExtractor) numXls++;
else if(embeds[i] instanceof WordExtractor) numWord++;
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
else if(embeds[i] instanceof XWPFWordExtractor) numWordX++;
for (POITextExtractor embed : embeds) {
assertTrue(embed.getText().length() > 20);
if (embed instanceof PowerPointExtractor) numPpt++;
else if (embed instanceof ExcelExtractor) numXls++;
else if (embed instanceof WordExtractor) numWord++;
else if (embed instanceof OutlookTextExtactor) numMsg++;
else if (embed instanceof XWPFWordExtractor) numWordX++;
}
assertEquals(1, numPpt);
assertEquals(1, numXls);
@ -784,12 +868,12 @@ public class TestExtractorFactory {
numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
assertEquals(1, embeds.length);
for(int i=0; i<embeds.length; i++) {
assertTrue(embeds[i].getText().length() > 20);
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
else if(embeds[i] instanceof ExcelExtractor) numXls++;
else if(embeds[i] instanceof WordExtractor) numWord++;
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
for (POITextExtractor embed : embeds) {
assertTrue(embed.getText().length() > 20);
if (embed instanceof PowerPointExtractor) numPpt++;
else if (embed instanceof ExcelExtractor) numXls++;
else if (embed instanceof WordExtractor) numWord++;
else if (embed instanceof OutlookTextExtactor) numMsg++;
}
assertEquals(0, numPpt);
assertEquals(0, numXls);
@ -804,12 +888,12 @@ public class TestExtractorFactory {
numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
assertEquals(1, embeds.length);
for(int i=0; i<embeds.length; i++) {
assertTrue(embeds[i].getText().length() > 20);
if(embeds[i] instanceof PowerPointExtractor) numPpt++;
else if(embeds[i] instanceof ExcelExtractor) numXls++;
else if(embeds[i] instanceof WordExtractor) numWord++;
else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
for (POITextExtractor embed : embeds) {
assertTrue(embed.getText().length() > 20);
if (embed instanceof PowerPointExtractor) numPpt++;
else if (embed instanceof ExcelExtractor) numXls++;
else if (embed instanceof WordExtractor) numWord++;
else if (embed instanceof OutlookTextExtactor) numMsg++;
}
assertEquals(0, numPpt);
assertEquals(0, numXls);
@ -923,11 +1007,24 @@ public class TestExtractorFactory {
* "No supported documents found in the OLE2 stream"
*/
@Test
public void a() throws Exception {
public void bug59074() throws Exception {
try {
ExtractorFactory.createExtractor(
POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
fail("Old excel formats not supported via ExtractorFactory");
} catch (OldExcelFormatException e) {}
} catch (OldExcelFormatException e) {
// expected here
}
}
@Test
public void testGetEmbeddedFromXMLExtractor() {
try {
// currently not implemented
ExtractorFactory.getEmbededDocsTextExtractors((POIXMLTextExtractor)null);
fail("Unsupported currently");
} catch (IllegalStateException e) {
// expected here
}
}
}