diff --git a/src/integrationtest/org/apache/poi/HeapDump.java b/src/integrationtest/org/apache/poi/HeapDump.java new file mode 100644 index 000000000..625527d84 --- /dev/null +++ b/src/integrationtest/org/apache/poi/HeapDump.java @@ -0,0 +1,62 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi; + +import com.sun.management.HotSpotDiagnosticMXBean; + +import java.io.IOException; +import java.lang.management.ManagementFactory; + +public class HeapDump { + // This is the name of the HotSpot Diagnostic MBean + private static final String HOTSPOT_BEAN_NAME = + "com.sun.management:type=HotSpotDiagnostic"; + + // field to store the hotspot diagnostic MBean + private static volatile HotSpotDiagnosticMXBean hotspotMBean; + + /** + * Call this method from your application whenever you + * want to dump the heap snapshot into a file. + * + * @param fileName name of the heap dump file + * @param live flag that tells whether to dump + * only the live objects + */ + public static void dumpHeap(String fileName, boolean live) throws IOException { + // initialize hotspot diagnostic MBean + initHotspotMBean(); + hotspotMBean.dumpHeap(fileName, live); + } + + // initialize the hotspot diagnostic MBean field + private static void initHotspotMBean() throws IOException { + if (hotspotMBean == null) { + synchronized (HeapDump.class) { + if (hotspotMBean == null) { + hotspotMBean = getHotspotMBean(); + } + } + } + } + + // get the hotspot diagnostic MBean from the platform MBean server + private static HotSpotDiagnosticMXBean getHotspotMBean() throws IOException { + return ManagementFactory.newPlatformMXBeanProxy(ManagementFactory.getPlatformMBeanServer(), + HOTSPOT_BEAN_NAME, HotSpotDiagnosticMXBean.class); + } +} diff --git a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java index 9b49cf771..8bd34acda 100644 --- a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java @@ -81,9 +81,9 @@ public abstract class AbstractFileHandler implements FileHandler { POITextExtractor extractor = ExtractorFactory.createExtractor(file); try { - assertNotNull(extractor); + assertNotNull("Should get a POITextExtractor but had none for file " + file, extractor); - assertNotNull(extractor.getText()); + assertNotNull("Should get some text but had none for file " + file, extractor.getText()); // also try metadata @SuppressWarnings("resource") diff --git a/src/integrationtest/org/apache/poi/stress/FileHandler.java b/src/integrationtest/org/apache/poi/stress/FileHandler.java index 8b65cfa47..c1888e731 100644 --- a/src/integrationtest/org/apache/poi/stress/FileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/FileHandler.java @@ -31,8 +31,8 @@ public interface FileHandler { * * Closing is handled by the framework outside this call. * - * @param stream - * @throws Exception + * @param stream The input stream to read the file from. + * @throws Exception If an error happens in the file-specific handler */ void handleFile(InputStream stream) throws Exception; diff --git a/src/integrationtest/org/apache/poi/stress/HSLFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HSLFFileHandler.java index db9548c41..b9cff7650 100644 --- a/src/integrationtest/org/apache/poi/stress/HSLFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HSLFFileHandler.java @@ -19,9 +19,7 @@ package org.apache.poi.stress; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import java.io.File; -import java.io.FileInputStream; -import java.io.InputStream; +import java.io.*; import org.apache.poi.hslf.record.Record; import org.apache.poi.hslf.usermodel.HSLFSlideShow; @@ -39,30 +37,65 @@ public class HSLFFileHandler extends SlideShowHandler { Record[] records = slide.getRecords(); assertNotNull(records); for(Record record : records) { + assertNotNull("Found a record which was null", record); assertTrue(record.getRecordType() >= 0); } - + handlePOIDocument(slide); - + HSLFSlideShow ss = new HSLFSlideShow(slide); handleSlideShow(ss); } - + + @Test + public void testOne() throws Exception { + testOneFile(new File("test-data/slideshow/54880_chinese.ppt")); + } + // a test-case to test this locally without executing the full TestAllFiles @Override @Test public void test() throws Exception { - InputStream stream = new FileInputStream("test-data/hpsf/Test_Humor-Generation.ppt"); - try { - handleFile(stream); - } finally { - stream.close(); + File[] files = new File("test-data/slideshow/").listFiles(new FilenameFilter() { + @Override + public boolean accept(File dir, String name) { + return name.endsWith(".ppt"); + } + }); + assertNotNull(files); + + System.out.println("Testing " + files.length + " files"); + + for(File file : files) { + try { + testOneFile(file); + } catch (Throwable e) { + e.printStackTrace(); + } } + } + + private void testOneFile(File file) throws Exception { + System.out.println(file); + + //System.setProperty("org.apache.poi.util.POILogger", "org.apache.poi.util.SystemOutLogger"); + InputStream stream = new FileInputStream(file); + try { + handleFile(stream); + } finally { + stream.close(); + } + + handleExtracting(file); } - // a test-case to test this locally without executing the full TestAllFiles - @Test - public void testExtractor() throws Exception { - handleExtracting(new File("test-data/slideshow/ae.ac.uaeu.faculty_nafaachbili_GeomLec1.pptx")); + public static void main(String[] args) throws Exception { + System.setProperty("org.apache.poi.util.POILogger", "org.apache.poi.util.SystemOutLogger"); + InputStream stream = new FileInputStream(args[0]); + try { + new HSLFFileHandler().handleFile(stream); + } finally { + stream.close(); + } } } diff --git a/src/integrationtest/org/apache/poi/stress/HSMFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HSMFFileHandler.java index 4f7bd4b5d..1d40e6f4b 100644 --- a/src/integrationtest/org/apache/poi/stress/HSMFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HSMFFileHandler.java @@ -18,6 +18,7 @@ package org.apache.poi.stress; import static org.junit.Assert.assertNotNull; +import java.io.File; import java.io.FileInputStream; import java.io.InputStream; @@ -75,11 +76,14 @@ public class HSMFFileHandler extends POIFSFileHandler { @Override @Test public void test() throws Exception { - InputStream stream = new FileInputStream("test-data/hsmf/example_received_regular.msg"); + File file = new File("test-data/hsmf/logsat.com_signatures_valid.msg"); + InputStream stream = new FileInputStream(file); try { handleFile(stream); } finally { stream.close(); } + + handleExtracting(file); } } \ No newline at end of file diff --git a/src/integrationtest/org/apache/poi/stress/HSSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HSSFFileHandler.java index bef4043b0..2743738eb 100644 --- a/src/integrationtest/org/apache/poi/stress/HSSFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HSSFFileHandler.java @@ -38,7 +38,7 @@ public class HSSFFileHandler extends SpreadsheetHandler { // TODO: some documents fail currently... // Note - as of Bugzilla 48036 (svn r828244, r828247) POI is capable of evaluating - // IntesectionPtg. However it is still not capable of parsing it. + // IntersectionPtg. However it is still not capable of parsing it. // So FormulaEvalTestData.xls now contains a few formulas that produce errors here. //HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(wb); //evaluator.evaluateAll(); @@ -100,17 +100,14 @@ public class HSSFFileHandler extends SpreadsheetHandler { // a test-case to test this locally without executing the full TestAllFiles @Test public void test() throws Exception { - InputStream stream = new FileInputStream("test-data/spreadsheet/49219.xls"); + File file = new File("test-data/spreadsheet/49219.xls"); + + InputStream stream = new FileInputStream(file); try { handleFile(stream); } finally { stream.close(); } + handleExtracting(file); } - - // a test-case to test this locally without executing the full TestAllFiles - @Test - public void testExtractor() throws Exception { - handleExtracting(new File("test-data/spreadsheet/BOOK_in_capitals.xls")); - } } \ No newline at end of file diff --git a/src/integrationtest/org/apache/poi/stress/OPCFileHandler.java b/src/integrationtest/org/apache/poi/stress/OPCFileHandler.java index 12493e24c..ca019ae51 100644 --- a/src/integrationtest/org/apache/poi/stress/OPCFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/OPCFileHandler.java @@ -57,7 +57,7 @@ public class OPCFileHandler extends AbstractFileHandler { @Override public void handleExtracting(File file) throws Exception { - // text-extraction is not possible currenlty for these types of files + // text-extraction is not possible currently for these types of files } // a test-case to test this locally without executing the full TestAllFiles diff --git a/src/integrationtest/org/apache/poi/stress/SlideShowHandler.java b/src/integrationtest/org/apache/poi/stress/SlideShowHandler.java index d76e959b8..5241b8089 100644 --- a/src/integrationtest/org/apache/poi/stress/SlideShowHandler.java +++ b/src/integrationtest/org/apache/poi/stress/SlideShowHandler.java @@ -50,7 +50,7 @@ public abstract class SlideShowHandler extends POIFSFileHandler { readContent(ss); - // read in the writen file + // read in the written file SlideShow read = SlideShowFactory.create(new ByteArrayInputStream(out.toByteArray())); try { assertNotNull(read); diff --git a/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java b/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java index 421f842e1..ce5e0c040 100644 --- a/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java @@ -69,17 +69,14 @@ public class XSLFFileHandler extends SlideShowHandler { @Override @Test public void test() throws Exception { - InputStream stream = new FileInputStream("test-data/slideshow/ae.ac.uaeu.faculty_nafaachbili_GeomLec1.pptx"); + File file = new File("test-data/slideshow/ae.ac.uaeu.faculty_nafaachbili_GeomLec1.pptx"); + InputStream stream = new FileInputStream(file); try { handleFile(stream); } finally { stream.close(); } - } - // a test-case to test this locally without executing the full TestAllFiles - @Test - public void testExtractor() throws Exception { - handleExtracting(new File("test-data/slideshow/ae.ac.uaeu.faculty_nafaachbili_GeomLec1.pptx")); + handleExtracting(file); } -} \ No newline at end of file +} diff --git a/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java index 111efe48c..ba2137770 100644 --- a/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java @@ -16,25 +16,6 @@ ==================================================================== */ package org.apache.poi.stress; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.TransformerException; -import java.io.BufferedInputStream; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.PrintStream; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Locale; -import java.util.Set; - import org.apache.poi.POIXMLException; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException; @@ -56,12 +37,18 @@ public class XSSFFileHandler extends SpreadsheetHandler { // ignore password protected files if (POIXMLDocumentHandler.isEncrypted(stream)) return; - ByteArrayOutputStream out = new ByteArrayOutputStream(); - IOUtils.copy(stream, out); - - final byte[] bytes = out.toByteArray(); final XSSFWorkbook wb; - wb = new XSSFWorkbook(new ByteArrayInputStream(bytes)); + + // make sure the potentially large byte-array is freed up quickly again + { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IOUtils.copy(stream, out); + final byte[] bytes = out.toByteArray(); + + checkXSSFReader(OPCPackage.open(new ByteArrayInputStream(bytes))); + + wb = new XSSFWorkbook(new ByteArrayInputStream(bytes)); + } // use the combined handler for HSSF/XSSF handleWorkbook(wb); @@ -76,9 +63,8 @@ public class XSSFFileHandler extends SpreadsheetHandler { // and finally ensure that exporting to XML works exportToXML(wb); - checkXSSFReader(OPCPackage.open(new ByteArrayInputStream(bytes))); - - wb.close(); + // this allows to trigger a heap-dump at this point to see which memory is still allocated + //HeapDump.dumpHeap("/tmp/poi.hprof", false); } @@ -183,18 +169,16 @@ public class XSSFFileHandler extends SpreadsheetHandler { // a test-case to test this locally without executing the full TestAllFiles @Test public void test() throws Exception { - InputStream stream = new BufferedInputStream(new FileInputStream("test-data/spreadsheet/ref-56737.xlsx")); + File file = new File("test-data/spreadsheet/ref-56737.xlsx"); + + InputStream stream = new BufferedInputStream(new FileInputStream(file)); try { handleFile(stream); } finally { stream.close(); } - } - // a test-case to test this locally without executing the full TestAllFiles - @Test - public void testExtractor() throws Exception { - handleExtracting(new File("test-data/spreadsheet/ref-56737.xlsx")); + handleExtracting(file); } @Test