Bug 54982: Add a close() interface to POITextExtractor which can be used to free resources later.

Implement close() where necessary so resources are closed. Add close()
to tests and run existing unit tests also against the Extractor that is
built via the Factory. Also add a small test-suite to quickly execute
all extractor-related tests.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1493669 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2013-06-17 07:53:59 +00:00
parent bd1214db1e
commit eee1b1af23
11 changed files with 202 additions and 22 deletions

View File

@ -16,6 +16,9 @@
==================================================================== */
package org.apache.poi;
import java.io.Closeable;
import java.io.IOException;
/**
* Common Parent for Text Extractors
* of POI Documents.
@ -27,7 +30,7 @@ package org.apache.poi;
* @see org.apache.poi.hdgf.extractor.VisioTextExtractor
* @see org.apache.poi.hwpf.extractor.WordExtractor
*/
public abstract class POITextExtractor {
public abstract class POITextExtractor implements Closeable {
/** The POIDocument that's open */
protected POIDocument document;
@ -61,4 +64,15 @@ public abstract class POITextExtractor {
* metadata / properties, such as author and title.
*/
public abstract POITextExtractor getMetadataTextExtractor();
/**
* Allows to free resources of the Extractor as soon as
* it is not needed any more. This may include closing
* open file handles and freeing memory.
*
* The Extractor cannot be used after close has been called.
*/
public void close() throws IOException {
// nothing to do in abstract class, derived classes may perform actions.
}
}

View File

@ -17,6 +17,8 @@
package org.apache.poi;
import java.io.IOException;
import org.apache.poi.POIXMLProperties.CoreProperties;
import org.apache.poi.POIXMLProperties.CustomProperties;
import org.apache.poi.POIXMLProperties.ExtendedProperties;
@ -75,4 +77,16 @@ public abstract class POIXMLTextExtractor extends POITextExtractor {
public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
return new POIXMLPropertiesTextExtractor(_document);
}
@Override
public void close() throws IOException {
// e.g. XSSFEventBaseExcelExtractor passes a null-document
if(_document != null) {
OPCPackage pkg = _document.getPackage();
if(pkg != null) {
pkg.close();
}
}
super.close();
}
}

View File

@ -191,6 +191,15 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor {
}
}
@Override
public void close() throws IOException {
if (container != null) {
container.close();
container = null;
}
super.close();
}
protected class SheetTextExtractor implements SheetContentsHandler {
private final StringBuffer output;
private boolean firstCellOfRow = true;

View File

@ -46,6 +46,9 @@ public final class TestXMLPropertiesTextExtractor extends TestCase {
assertTrue(text.contains("LastModifiedBy = Yury Batrakov"));
assertTrue(cText.contains("LastModifiedBy = Yury Batrakov"));
textExt.close();
ext.close();
}
public void testCore() throws Exception {
@ -63,6 +66,8 @@ public final class TestXMLPropertiesTextExtractor extends TestCase {
assertTrue(text.contains("LastModifiedBy = Yury Batrakov"));
assertTrue(cText.contains("LastModifiedBy = Yury Batrakov"));
ext.close();
}
public void testExtended() throws Exception {
@ -82,6 +87,8 @@ public final class TestXMLPropertiesTextExtractor extends TestCase {
assertTrue(text.contains("Company = Mera"));
assertTrue(eText.contains("Application = Microsoft Excel"));
assertTrue(eText.contains("Company = Mera"));
ext.close();
}
public void testCustom() throws Exception {
@ -99,6 +106,8 @@ public final class TestXMLPropertiesTextExtractor extends TestCase {
assertTrue(text.contains("description = another value"));
assertTrue(cText.contains("description = another value"));
ext.close();
}
/**
@ -118,5 +127,7 @@ public final class TestXMLPropertiesTextExtractor extends TestCase {
assertFalse(text.contains("Created =")); // With date is null
assertTrue(text.contains("CreatedString = ")); // Via string is blank
assertTrue(text.contains("LastModifiedBy = IT Client Services"));
ext.close();
}
}

View File

@ -43,8 +43,8 @@ public class TestXSLFPowerPointExtractor extends TestCase {
* Get text out of the simple file
*/
public void testGetSimpleText() throws Exception {
new XSLFPowerPointExtractor(xmlA);
new XSLFPowerPointExtractor(pkg);
new XSLFPowerPointExtractor(xmlA).close();
new XSLFPowerPointExtractor(pkg).close();
XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xmlA);
@ -148,6 +148,8 @@ public class TestXSLFPowerPointExtractor extends TestCase {
assertEquals(
"\n\n\n\n", text
);
extractor.close();
}
public void testGetComments() throws Exception {
@ -165,6 +167,8 @@ public class TestXSLFPowerPointExtractor extends TestCase {
// Check the authors came through too
assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01"));
extractor.close();
}
public void testGetMasterText() throws Exception {
@ -206,6 +210,8 @@ public class TestXSLFPowerPointExtractor extends TestCase {
"This text comes from the Master Slide\n"
, text
);
extractor.close();
}
public void testTable() throws Exception {
@ -219,6 +225,8 @@ public class TestXSLFPowerPointExtractor extends TestCase {
// Check comments are there
assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST"));
extractor.close();
}
/**
@ -267,6 +275,8 @@ public class TestXSLFPowerPointExtractor extends TestCase {
"Text missing for " + filename + "\n" + text,
text.contains("Mystery")
);
extractor.close();
}
}
}

View File

@ -30,10 +30,8 @@ import org.apache.poi.xssf.XSSFTestDataSamples;
/**
* Tests for {@link XSSFEventBasedExcelExtractor}
*/
public final class TestXSSFEventBasedExcelExtractor extends TestCase {
private static final XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
public class TestXSSFEventBasedExcelExtractor extends TestCase {
protected XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
return new XSSFEventBasedExcelExtractor(XSSFTestDataSamples.
openSamplePackage(sampleName));
}
@ -97,6 +95,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase {
CHUNK2 +
"Sheet3\n"
, text);
extractor.close();
}
public void testGetComplexText() throws Exception {
@ -112,6 +112,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase {
"Avgtxfull\n" +
"(iii) AVERAGE TAX RATES ON ANNUAL"
));
extractor.close();
}
public void testInlineStrings() throws Exception {
@ -134,6 +136,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase {
// Formulas
assertTrue("Unable to find expected word in text\n" + text, text.contains("A2"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2"));
extractor.close();
}
/**
@ -159,5 +163,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase {
Matcher m = pattern.matcher(text);
assertTrue(m.matches());
}
ole2Extractor.close();
ooxmlExtractor.close();
}
}

View File

@ -0,0 +1,29 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.extractor;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.hssf.HSSFTestDataSamples;
public class TestXSSFEventBasedExcelExtractorUsingFactory extends TestXSSFEventBasedExcelExtractor {
@Override
protected final XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
ExtractorFactory.setAllThreadsPreferEventExtractors(true);
return (XSSFEventBasedExcelExtractor) ExtractorFactory.createExtractor(HSSFTestDataSamples.openSampleFileStream(sampleName));
}
}

View File

@ -17,6 +17,7 @@
package org.apache.poi.xssf.extractor;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -30,17 +31,16 @@ import org.apache.poi.xssf.XSSFTestDataSamples;
/**
* Tests for {@link XSSFExcelExtractor}
*/
public final class TestXSSFExcelExtractor extends TestCase {
private static final XSSFExcelExtractor getExtractor(String sampleName) {
public class TestXSSFExcelExtractor extends TestCase {
protected XSSFExcelExtractor getExtractor(String sampleName) {
return new XSSFExcelExtractor(XSSFTestDataSamples.openSampleWorkbook(sampleName));
}
/**
* Get text out of the simple file
* @throws IOException
*/
public void testGetSimpleText() {
public void testGetSimpleText() throws IOException {
// a very simple file
XSSFExcelExtractor extractor = getExtractor("sample.xlsx");
extractor.getText();
@ -96,9 +96,11 @@ public final class TestXSSFExcelExtractor extends TestCase {
CHUNK2 +
"Sheet3\n"
, text);
extractor.close();
}
public void testGetComplexText() {
public void testGetComplexText() throws IOException {
// A fairly complex file
XSSFExcelExtractor extractor = getExtractor("AverageTaxRates.xlsx");
extractor.getText();
@ -112,14 +114,17 @@ public final class TestXSSFExcelExtractor extends TestCase {
"Avgtxfull\n" +
"null\t(iii) AVERAGE TAX RATES ON ANNUAL"
));
extractor.close();
}
/**
* Test that we return pretty much the same as
* ExcelExtractor does, when we're both passed
* the same file, just saved as xls and xlsx
* @throws IOException
*/
public void testComparedToOLE2() {
public void testComparedToOLE2() throws IOException {
// A fairly simple file - ooxml
XSSFExcelExtractor ooxmlExtractor = getExtractor("SampleSS.xlsx");
@ -137,12 +142,16 @@ public final class TestXSSFExcelExtractor extends TestCase {
Matcher m = pattern.matcher(text);
assertTrue(m.matches());
}
ole2Extractor.close();
ooxmlExtractor.close();
}
/**
* From bug #45540
* @throws IOException
*/
public void testHeaderFooter() {
public void testHeaderFooter() throws IOException {
String[] files = new String[] {
"45540_classic_Header.xlsx", "45540_form_Header.xlsx",
"45540_classic_Footer.xlsx", "45540_form_Footer.xlsx",
@ -153,14 +162,16 @@ public final class TestXSSFExcelExtractor extends TestCase {
assertTrue("Unable to find expected word in text from " + sampleName + "\n" + text, text.contains("testdoc"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
extractor.close();
}
}
/**
* From bug #45544
* @throws IOException
*/
public void testComments() {
public void testComments() throws IOException {
XSSFExcelExtractor extractor = getExtractor("45544.xlsx");
String text = extractor.getText();
@ -173,9 +184,11 @@ public final class TestXSSFExcelExtractor extends TestCase {
text = extractor.getText();
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
extractor.close();
}
public void testInlineStrings() {
public void testInlineStrings() throws IOException {
XSSFExcelExtractor extractor = getExtractor("InlineStrings.xlsx");
extractor.setFormulasNotResults(true);
String text = extractor.getText();
@ -195,5 +208,7 @@ public final class TestXSSFExcelExtractor extends TestCase {
// Formulas
assertTrue("Unable to find expected word in text\n" + text, text.contains("A2"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2"));
extractor.close();
}
}

View File

@ -0,0 +1,37 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.extractor;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.hssf.HSSFTestDataSamples;
/**
* Tests for {@link XSSFExcelExtractor}
*/
public final class TestXSSFExcelExtractorUsingFactory extends TestXSSFExcelExtractor {
@Override
protected final XSSFExcelExtractor getExtractor(String sampleName) {
ExtractorFactory.setAllThreadsPreferEventExtractors(false);
ExtractorFactory.setThreadPrefersEventExtractors(false);
try {
return (XSSFExcelExtractor) ExtractorFactory.createExtractor(HSSFTestDataSamples.openSampleFileStream(sampleName));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}

View File

@ -57,6 +57,8 @@ public class TestXWPFWordExtractor extends TestCase {
}
}
assertEquals(3, ps);
extractor.close();
}
/**
@ -93,6 +95,8 @@ public class TestXWPFWordExtractor extends TestCase {
}
}
assertEquals(134, ps);
extractor.close();
}
public void testGetWithHyperlinks() throws IOException {
@ -118,6 +122,8 @@ public class TestXWPFWordExtractor extends TestCase {
"We have a hyperlink <http://poi.apache.org/> here, and another.\n",
extractor.getText()
);
extractor.close();
}
public void testHeadersFooters() throws IOException {
@ -141,7 +147,11 @@ public class TestXWPFWordExtractor extends TestCase {
// Now another file, expect multiple headers
// and multiple footers
doc = XWPFTestDataSamples.openSampleDocument("DiffFirstPageHeadFoot.docx");
extractor.close();
extractor = new XWPFWordExtractor(doc);
extractor.close();
extractor =
new XWPFWordExtractor(doc);
extractor.getText();
@ -161,6 +171,8 @@ public class TestXWPFWordExtractor extends TestCase {
"Footer Left\tFooter Middle\tFooter Right\n",
extractor.getText()
);
extractor.close();
}
public void testFootnotes() throws IOException {
@ -169,6 +181,8 @@ public class TestXWPFWordExtractor extends TestCase {
String text = extractor.getText();
assertTrue(text.contains("snoska"));
assertTrue(text.contains("Eto ochen prostoy[footnoteRef:1] text so snoskoy"));
extractor.close();
}
@ -177,6 +191,8 @@ public class TestXWPFWordExtractor extends TestCase {
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
assertTrue(extractor.getText().contains("snoska"));
extractor.close();
}
public void testFormFootnotes() throws IOException {
@ -186,6 +202,8 @@ public class TestXWPFWordExtractor extends TestCase {
String text = extractor.getText();
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
extractor.close();
}
public void testEndnotes() throws IOException {
@ -194,6 +212,8 @@ public class TestXWPFWordExtractor extends TestCase {
String text = extractor.getText();
assertTrue(text.contains("XXX"));
assertTrue(text.contains("tilaka [endnoteRef:2]or 'tika'"));
extractor.close();
}
public void testInsertedDeletedText() throws IOException {
@ -202,6 +222,8 @@ public class TestXWPFWordExtractor extends TestCase {
assertTrue(extractor.getText().contains("pendant worn"));
assertTrue(extractor.getText().contains("extremely well"));
extractor.close();
}
public void testParagraphHeader() throws IOException {
@ -211,6 +233,8 @@ public class TestXWPFWordExtractor extends TestCase {
assertTrue(extractor.getText().contains("Section 1"));
assertTrue(extractor.getText().contains("Section 2"));
assertTrue(extractor.getText().contains("Section 3"));
extractor.close();
}
/**
@ -225,6 +249,8 @@ public class TestXWPFWordExtractor extends TestCase {
assertTrue(extractor.getText().contains("2004"));
assertTrue(extractor.getText().contains("2008"));
assertTrue(extractor.getText().contains("(120 "));
extractor.close();
}
/**
@ -244,6 +270,8 @@ public class TestXWPFWordExtractor extends TestCase {
// Now check the first paragraph in total
assertTrue(extractor.getText().contains("a\tb\n"));
extractor.close();
}
/**
@ -258,6 +286,8 @@ public class TestXWPFWordExtractor extends TestCase {
assertTrue(text.length() > 0);
assertFalse(text.contains("AUTHOR"));
assertFalse(text.contains("CREATEDATE"));
extractor.close();
}
/**
@ -271,6 +301,8 @@ public class TestXWPFWordExtractor extends TestCase {
String text = extractor.getText();
assertTrue(text.length() > 0);
assertTrue(text.contains("FldSimple.docx"));
extractor.close();
}
/**
@ -282,5 +314,7 @@ public class TestXWPFWordExtractor extends TestCase {
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
String text = extractor.getText();
assertTrue(text.length() > 0);
extractor.close();
}
}

View File

@ -239,7 +239,7 @@ public final class TestExcelExtractor extends TestCase {
) > -1
);
assertTrue(
assertTrue("Had: " + text + ", but should contain 'nn.nn\\t10.52\\n'",
text.indexOf(
"nn.nn\t10.52\n"
) > -1