diff --git a/src/java/org/apache/poi/POITextExtractor.java b/src/java/org/apache/poi/POITextExtractor.java index 0b69894d0..e18078b46 100644 --- a/src/java/org/apache/poi/POITextExtractor.java +++ b/src/java/org/apache/poi/POITextExtractor.java @@ -16,6 +16,9 @@ ==================================================================== */ package org.apache.poi; +import java.io.Closeable; +import java.io.IOException; + /** * Common Parent for Text Extractors * of POI Documents. @@ -27,7 +30,7 @@ package org.apache.poi; * @see org.apache.poi.hdgf.extractor.VisioTextExtractor * @see org.apache.poi.hwpf.extractor.WordExtractor */ -public abstract class POITextExtractor { +public abstract class POITextExtractor implements Closeable { /** The POIDocument that's open */ protected POIDocument document; @@ -61,4 +64,15 @@ public abstract class POITextExtractor { * metadata / properties, such as author and title. */ public abstract POITextExtractor getMetadataTextExtractor(); + + /** + * Allows to free resources of the Extractor as soon as + * it is not needed any more. This may include closing + * open file handles and freeing memory. + * + * The Extractor cannot be used after close has been called. + */ + public void close() throws IOException { + // nothing to do in abstract class, derived classes may perform actions. + } } diff --git a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java index eee1d25ab..eeb03f7aa 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java +++ b/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java @@ -17,6 +17,8 @@ package org.apache.poi; +import java.io.IOException; + import org.apache.poi.POIXMLProperties.CoreProperties; import org.apache.poi.POIXMLProperties.CustomProperties; import org.apache.poi.POIXMLProperties.ExtendedProperties; @@ -75,4 +77,16 @@ public abstract class POIXMLTextExtractor extends POITextExtractor { public POIXMLPropertiesTextExtractor getMetadataTextExtractor() { return new POIXMLPropertiesTextExtractor(_document); } + + @Override + public void close() throws IOException { + // e.g. XSSFEventBaseExcelExtractor passes a null-document + if(_document != null) { + OPCPackage pkg = _document.getPackage(); + if(pkg != null) { + pkg.close(); + } + } + super.close(); + } } diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java index efc42cff5..0c31fe04d 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java @@ -191,6 +191,15 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor { } } + @Override + public void close() throws IOException { + if (container != null) { + container.close(); + container = null; + } + super.close(); + } + protected class SheetTextExtractor implements SheetContentsHandler { private final StringBuffer output; private boolean firstCellOfRow = true; diff --git a/src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java b/src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java index 8ad2f78c4..0df6ccf81 100644 --- a/src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java @@ -46,6 +46,9 @@ public final class TestXMLPropertiesTextExtractor extends TestCase { assertTrue(text.contains("LastModifiedBy = Yury Batrakov")); assertTrue(cText.contains("LastModifiedBy = Yury Batrakov")); + + textExt.close(); + ext.close(); } public void testCore() throws Exception { @@ -63,6 +66,8 @@ public final class TestXMLPropertiesTextExtractor extends TestCase { assertTrue(text.contains("LastModifiedBy = Yury Batrakov")); assertTrue(cText.contains("LastModifiedBy = Yury Batrakov")); + + ext.close(); } public void testExtended() throws Exception { @@ -82,6 +87,8 @@ public final class TestXMLPropertiesTextExtractor extends TestCase { assertTrue(text.contains("Company = Mera")); assertTrue(eText.contains("Application = Microsoft Excel")); assertTrue(eText.contains("Company = Mera")); + + ext.close(); } public void testCustom() throws Exception { @@ -99,6 +106,8 @@ public final class TestXMLPropertiesTextExtractor extends TestCase { assertTrue(text.contains("description = another value")); assertTrue(cText.contains("description = another value")); + + ext.close(); } /** @@ -118,5 +127,7 @@ public final class TestXMLPropertiesTextExtractor extends TestCase { assertFalse(text.contains("Created =")); // With date is null assertTrue(text.contains("CreatedString = ")); // Via string is blank assertTrue(text.contains("LastModifiedBy = IT Client Services")); + + ext.close(); } } diff --git a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java index cb7ad736f..35ee3f1cb 100644 --- a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java @@ -43,8 +43,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { * Get text out of the simple file */ public void testGetSimpleText() throws Exception { - new XSLFPowerPointExtractor(xmlA); - new XSLFPowerPointExtractor(pkg); + new XSLFPowerPointExtractor(xmlA).close(); + new XSLFPowerPointExtractor(pkg).close(); XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xmlA); @@ -148,6 +148,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { assertEquals( "\n\n\n\n", text ); + + extractor.close(); } public void testGetComments() throws Exception { @@ -165,6 +167,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { // Check the authors came through too assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01")); + + extractor.close(); } public void testGetMasterText() throws Exception { @@ -206,6 +210,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { "This text comes from the Master Slide\n" , text ); + + extractor.close(); } public void testTable() throws Exception { @@ -219,6 +225,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { // Check comments are there assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST")); + + extractor.close(); } /** @@ -267,6 +275,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { "Text missing for " + filename + "\n" + text, text.contains("Mystery") ); + + extractor.close(); } } } diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java index eade64f2f..eac3700e7 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java @@ -30,12 +30,10 @@ import org.apache.poi.xssf.XSSFTestDataSamples; /** * Tests for {@link XSSFEventBasedExcelExtractor} */ -public final class TestXSSFEventBasedExcelExtractor extends TestCase { - - - private static final XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception { - return new XSSFEventBasedExcelExtractor(XSSFTestDataSamples. - openSamplePackage(sampleName)); +public class TestXSSFEventBasedExcelExtractor extends TestCase { + protected XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception { + return new XSSFEventBasedExcelExtractor(XSSFTestDataSamples. + openSamplePackage(sampleName)); } /** @@ -97,6 +95,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase { CHUNK2 + "Sheet3\n" , text); + + extractor.close(); } public void testGetComplexText() throws Exception { @@ -112,6 +112,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase { "Avgtxfull\n" + "(iii) AVERAGE TAX RATES ON ANNUAL" )); + + extractor.close(); } public void testInlineStrings() throws Exception { @@ -134,6 +136,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase { // Formulas assertTrue("Unable to find expected word in text\n" + text, text.contains("A2")); assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2")); + + extractor.close(); } /** @@ -159,5 +163,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase { Matcher m = pattern.matcher(text); assertTrue(m.matches()); } + + ole2Extractor.close(); + ooxmlExtractor.close(); } } diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java new file mode 100644 index 000000000..05e790fd5 --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java @@ -0,0 +1,29 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xssf.extractor; + +import org.apache.poi.extractor.ExtractorFactory; +import org.apache.poi.hssf.HSSFTestDataSamples; + + +public class TestXSSFEventBasedExcelExtractorUsingFactory extends TestXSSFEventBasedExcelExtractor { + @Override + protected final XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception { + ExtractorFactory.setAllThreadsPreferEventExtractors(true); + return (XSSFEventBasedExcelExtractor) ExtractorFactory.createExtractor(HSSFTestDataSamples.openSampleFileStream(sampleName)); + } +} diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java index 2d14cd535..bc86d6f9b 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java @@ -17,6 +17,7 @@ package org.apache.poi.xssf.extractor; +import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -30,17 +31,16 @@ import org.apache.poi.xssf.XSSFTestDataSamples; /** * Tests for {@link XSSFExcelExtractor} */ -public final class TestXSSFExcelExtractor extends TestCase { - - - private static final XSSFExcelExtractor getExtractor(String sampleName) { +public class TestXSSFExcelExtractor extends TestCase { + protected XSSFExcelExtractor getExtractor(String sampleName) { return new XSSFExcelExtractor(XSSFTestDataSamples.openSampleWorkbook(sampleName)); } /** * Get text out of the simple file + * @throws IOException */ - public void testGetSimpleText() { + public void testGetSimpleText() throws IOException { // a very simple file XSSFExcelExtractor extractor = getExtractor("sample.xlsx"); extractor.getText(); @@ -96,9 +96,11 @@ public final class TestXSSFExcelExtractor extends TestCase { CHUNK2 + "Sheet3\n" , text); + + extractor.close(); } - public void testGetComplexText() { + public void testGetComplexText() throws IOException { // A fairly complex file XSSFExcelExtractor extractor = getExtractor("AverageTaxRates.xlsx"); extractor.getText(); @@ -112,14 +114,17 @@ public final class TestXSSFExcelExtractor extends TestCase { "Avgtxfull\n" + "null\t(iii) AVERAGE TAX RATES ON ANNUAL" )); + + extractor.close(); } /** * Test that we return pretty much the same as * ExcelExtractor does, when we're both passed * the same file, just saved as xls and xlsx + * @throws IOException */ - public void testComparedToOLE2() { + public void testComparedToOLE2() throws IOException { // A fairly simple file - ooxml XSSFExcelExtractor ooxmlExtractor = getExtractor("SampleSS.xlsx"); @@ -137,12 +142,16 @@ public final class TestXSSFExcelExtractor extends TestCase { Matcher m = pattern.matcher(text); assertTrue(m.matches()); } + + ole2Extractor.close(); + ooxmlExtractor.close(); } /** * From bug #45540 + * @throws IOException */ - public void testHeaderFooter() { + public void testHeaderFooter() throws IOException { String[] files = new String[] { "45540_classic_Header.xlsx", "45540_form_Header.xlsx", "45540_classic_Footer.xlsx", "45540_form_Footer.xlsx", @@ -152,15 +161,17 @@ public final class TestXSSFExcelExtractor extends TestCase { String text = extractor.getText(); assertTrue("Unable to find expected word in text from " + sampleName + "\n" + text, text.contains("testdoc")); - assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); + assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); + + extractor.close(); } } /** * From bug #45544 + * @throws IOException */ - public void testComments() { - + public void testComments() throws IOException { XSSFExcelExtractor extractor = getExtractor("45544.xlsx"); String text = extractor.getText(); @@ -173,9 +184,11 @@ public final class TestXSSFExcelExtractor extends TestCase { text = extractor.getText(); assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); + + extractor.close(); } - public void testInlineStrings() { + public void testInlineStrings() throws IOException { XSSFExcelExtractor extractor = getExtractor("InlineStrings.xlsx"); extractor.setFormulasNotResults(true); String text = extractor.getText(); @@ -195,5 +208,7 @@ public final class TestXSSFExcelExtractor extends TestCase { // Formulas assertTrue("Unable to find expected word in text\n" + text, text.contains("A2")); assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2")); + + extractor.close(); } } diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java new file mode 100644 index 000000000..fd5cde38e --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java @@ -0,0 +1,37 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.extractor; + +import org.apache.poi.extractor.ExtractorFactory; +import org.apache.poi.hssf.HSSFTestDataSamples; + +/** + * Tests for {@link XSSFExcelExtractor} + */ +public final class TestXSSFExcelExtractorUsingFactory extends TestXSSFExcelExtractor { + @Override + protected final XSSFExcelExtractor getExtractor(String sampleName) { + ExtractorFactory.setAllThreadsPreferEventExtractors(false); + ExtractorFactory.setThreadPrefersEventExtractors(false); + try { + return (XSSFExcelExtractor) ExtractorFactory.createExtractor(HSSFTestDataSamples.openSampleFileStream(sampleName)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java index 34d20aae7..f62749f52 100644 --- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java @@ -57,6 +57,8 @@ public class TestXWPFWordExtractor extends TestCase { } } assertEquals(3, ps); + + extractor.close(); } /** @@ -93,6 +95,8 @@ public class TestXWPFWordExtractor extends TestCase { } } assertEquals(134, ps); + + extractor.close(); } public void testGetWithHyperlinks() throws IOException { @@ -118,6 +122,8 @@ public class TestXWPFWordExtractor extends TestCase { "We have a hyperlink here, and another.\n", extractor.getText() ); + + extractor.close(); } public void testHeadersFooters() throws IOException { @@ -141,7 +147,11 @@ public class TestXWPFWordExtractor extends TestCase { // Now another file, expect multiple headers // and multiple footers doc = XWPFTestDataSamples.openSampleDocument("DiffFirstPageHeadFoot.docx"); + extractor.close(); + extractor = new XWPFWordExtractor(doc); + extractor.close(); + extractor = new XWPFWordExtractor(doc); extractor.getText(); @@ -161,6 +171,8 @@ public class TestXWPFWordExtractor extends TestCase { "Footer Left\tFooter Middle\tFooter Right\n", extractor.getText() ); + + extractor.close(); } public void testFootnotes() throws IOException { @@ -169,6 +181,8 @@ public class TestXWPFWordExtractor extends TestCase { String text = extractor.getText(); assertTrue(text.contains("snoska")); assertTrue(text.contains("Eto ochen prostoy[footnoteRef:1] text so snoskoy")); + + extractor.close(); } @@ -177,6 +191,8 @@ public class TestXWPFWordExtractor extends TestCase { XWPFWordExtractor extractor = new XWPFWordExtractor(doc); assertTrue(extractor.getText().contains("snoska")); + + extractor.close(); } public void testFormFootnotes() throws IOException { @@ -186,6 +202,8 @@ public class TestXWPFWordExtractor extends TestCase { String text = extractor.getText(); assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); + + extractor.close(); } public void testEndnotes() throws IOException { @@ -194,6 +212,8 @@ public class TestXWPFWordExtractor extends TestCase { String text = extractor.getText(); assertTrue(text.contains("XXX")); assertTrue(text.contains("tilaka [endnoteRef:2]or 'tika'")); + + extractor.close(); } public void testInsertedDeletedText() throws IOException { @@ -202,6 +222,8 @@ public class TestXWPFWordExtractor extends TestCase { assertTrue(extractor.getText().contains("pendant worn")); assertTrue(extractor.getText().contains("extremely well")); + + extractor.close(); } public void testParagraphHeader() throws IOException { @@ -211,6 +233,8 @@ public class TestXWPFWordExtractor extends TestCase { assertTrue(extractor.getText().contains("Section 1")); assertTrue(extractor.getText().contains("Section 2")); assertTrue(extractor.getText().contains("Section 3")); + + extractor.close(); } /** @@ -225,6 +249,8 @@ public class TestXWPFWordExtractor extends TestCase { assertTrue(extractor.getText().contains("2004")); assertTrue(extractor.getText().contains("2008")); assertTrue(extractor.getText().contains("(120 ")); + + extractor.close(); } /** @@ -244,6 +270,8 @@ public class TestXWPFWordExtractor extends TestCase { // Now check the first paragraph in total assertTrue(extractor.getText().contains("a\tb\n")); + + extractor.close(); } /** @@ -258,6 +286,8 @@ public class TestXWPFWordExtractor extends TestCase { assertTrue(text.length() > 0); assertFalse(text.contains("AUTHOR")); assertFalse(text.contains("CREATEDATE")); + + extractor.close(); } /** @@ -271,6 +301,8 @@ public class TestXWPFWordExtractor extends TestCase { String text = extractor.getText(); assertTrue(text.length() > 0); assertTrue(text.contains("FldSimple.docx")); + + extractor.close(); } /** @@ -282,5 +314,7 @@ public class TestXWPFWordExtractor extends TestCase { XWPFWordExtractor extractor = new XWPFWordExtractor(doc); String text = extractor.getText(); assertTrue(text.length() > 0); + + extractor.close(); } } diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java index 00b3afad8..de8221029 100644 --- a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java +++ b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java @@ -239,7 +239,7 @@ public final class TestExcelExtractor extends TestCase { ) > -1 ); - assertTrue( + assertTrue("Had: " + text + ", but should contain 'nn.nn\\t10.52\\n'", text.indexOf( "nn.nn\t10.52\n" ) > -1