diff --git a/src/integrationtest/org/apache/poi/TestAllFiles.java b/src/integrationtest/org/apache/poi/TestAllFiles.java index 6231065e7..9348c0593 100644 --- a/src/integrationtest/org/apache/poi/TestAllFiles.java +++ b/src/integrationtest/org/apache/poi/TestAllFiles.java @@ -234,6 +234,8 @@ public class TestAllFiles { EXPECTED_FAILURES.add("spreadsheet/Simple.xlsb"); EXPECTED_FAILURES.add("poifs/unknown_properties.msg"); // POIFS properties corrupted EXPECTED_FAILURES.add("poifs/only-zero-byte-streams.ole2"); // No actual contents + EXPECTED_FAILURES.add("spreadsheet/poc-xmlbomb.xlsx"); // contains xml-entity-expansion + EXPECTED_FAILURES.add("spreadsheet/poc-shared-strings.xlsx"); // contains shared-string-entity-expansion // old Excel files, which we only support simple text extraction of EXPECTED_FAILURES.add("spreadsheet/testEXCEL_2.xls"); diff --git a/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java b/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java index f12bbd2de..64e9805cb 100644 --- a/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java +++ b/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java @@ -69,12 +69,17 @@ public abstract class SpreadsheetHandler extends AbstractFileHandler { } private void readContent(Workbook wb) { - for(int i = 0;i < wb.getNumberOfSheets();i++) { + for(int i = 0;i < wb.getNumberOfSheets();i++) { Sheet sheet = wb.getSheetAt(i); assertNotNull(wb.getSheet(sheet.getSheetName())); sheet.groupColumn((short) 4, (short) 5); sheet.setColumnGroupCollapsed(4, true); sheet.setColumnGroupCollapsed(4, false); + + // don't do this for very large sheets as it will take a long time + if(sheet.getPhysicalNumberOfRows() > 1000) { + continue; + } for(Row row : sheet) { for(Cell cell : row) { diff --git a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestZipPackage.java b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestZipPackage.java index f8c9afe97..74f5ea93e 100644 --- a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestZipPackage.java +++ b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestZipPackage.java @@ -17,12 +17,29 @@ package org.apache.poi.openxml4j.opc; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.io.InputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; +import org.apache.poi.POITextExtractor; +import org.apache.poi.POIXMLException; +import org.apache.poi.extractor.ExtractorFactory; +import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.ss.usermodel.WorkbookFactory; +import org.apache.poi.xssf.XSSFTestDataSamples; import org.apache.poi.xwpf.usermodel.XWPFRelation; +import org.apache.xmlbeans.XmlException; import org.junit.Test; public class TestZipPackage { @@ -51,4 +68,98 @@ public class TestZipPackage { assertFalse("Document should not be found in " + p.getParts(), foundDocument); assertFalse("Theme1 should not found in " + p.getParts(), foundTheme1); } + + @Test + public void testZipEntityExpansionTerminates() throws IOException { + try { + Workbook wb = XSSFTestDataSamples.openSampleWorkbook("poc-xmlbomb.xlsx"); + wb.close(); + fail("Should catch exception due to entity expansion limitations"); + } catch (POIXMLException e) { + assertEntityLimitReached(e); + } + } + + private void assertEntityLimitReached(Exception e) throws UnsupportedEncodingException { + ByteArrayOutputStream str = new ByteArrayOutputStream(); + PrintWriter writer = new PrintWriter(new OutputStreamWriter(str, "UTF-8")); + try { + e.printStackTrace(writer); + } finally { + writer.close(); + } + String string = new String(str.toByteArray(), "UTF-8"); + assertTrue("Had: " + string, string.contains("Exceeded Entity dereference bytes limit")); + } + + @Test + public void testZipEntityExpansionExceedsMemory() throws Exception { + try { + Workbook wb = WorkbookFactory.create(XSSFTestDataSamples.openSamplePackage("poc-xmlbomb.xlsx")); + wb.close(); + fail("Should catch exception due to entity expansion limitations"); + } catch (POIXMLException e) { + assertEntityLimitReached(e); + } + + try { + POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("poc-xmlbomb.xlsx")); + try { + assertNotNull(extractor); + + try { + extractor.getText(); + } catch (IllegalStateException e) { + // expected due to shared strings expansion + } + } finally { + extractor.close(); + } + } catch (POIXMLException e) { + assertEntityLimitReached(e); + } + } + + @Test + public void testZipEntityExpansionSharedStringTable() throws Exception { + Workbook wb = WorkbookFactory.create(XSSFTestDataSamples.openSamplePackage("poc-shared-strings.xlsx")); + wb.close(); + + POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("poc-shared-strings.xlsx")); + try { + assertNotNull(extractor); + + try { + extractor.getText(); + } catch (IllegalStateException e) { + // expected due to shared strings expansion + } + } finally { + extractor.close(); + } + } + + @Test + public void testZipEntityExpansionSharedStringTableEvents() throws Exception { + boolean before = ExtractorFactory.getThreadPrefersEventExtractors(); + ExtractorFactory.setThreadPrefersEventExtractors(true); + try { + POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("poc-shared-strings.xlsx")); + try { + assertNotNull(extractor); + + try { + extractor.getText(); + } catch (IllegalStateException e) { + // expected due to shared strings expansion + } + } finally { + extractor.close(); + } + } catch (XmlException e) { + assertEntityLimitReached(e); + } finally { + ExtractorFactory.setThreadPrefersEventExtractors(before); + } + } } diff --git a/test-data/spreadsheet/poc-shared-strings.xlsx b/test-data/spreadsheet/poc-shared-strings.xlsx new file mode 100644 index 000000000..e64abc612 Binary files /dev/null and b/test-data/spreadsheet/poc-shared-strings.xlsx differ diff --git a/test-data/spreadsheet/poc-xmlbomb.xlsx b/test-data/spreadsheet/poc-xmlbomb.xlsx new file mode 100644 index 000000000..f194ab01f Binary files /dev/null and b/test-data/spreadsheet/poc-xmlbomb.xlsx differ