diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java index dc25f63a3..a313f3602 100644 --- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java +++ b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java @@ -94,18 +94,27 @@ public class ExtractorFactory { if(core.size() != 1) { throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size()); } - - PackagePart corePart = pkg.getPart(core.getRelationship(0)); - if(corePart.getContentType().equals(XSSFRelation.WORKBOOK.getContentType())) { - return new XSSFExcelExtractor(pkg); - } - if(corePart.getContentType().equals(XWPFRelation.DOCUMENT.getContentType())) { + + PackagePart corePart = pkg.getPart(core.getRelationship(0)); + if (corePart.getContentType().equals(XSSFRelation.WORKBOOK.getContentType()) || + corePart.getContentType().equals(XSSFRelation.MACRO_TEMPLATE_WORKBOOK.getContentType()) || + corePart.getContentType().equals(XSSFRelation.MACRO_ADDIN_WORKBOOK.getContentType()) || + corePart.getContentType().equals(XSSFRelation.TEMPLATE_WORKBOOK.getContentType()) || + corePart.getContentType().equals(XSSFRelation.MACROS_WORKBOOK.getContentType())) { + return new XSSFExcelExtractor(pkg); + } + + if(corePart.getContentType().equals(XWPFRelation.DOCUMENT.getContentType()) || + corePart.getContentType().equals(XWPFRelation.TEMPLATE.getContentType()) || + corePart.getContentType().equals(XWPFRelation.MACRO_DOCUMENT.getContentType()) || + corePart.getContentType().equals(XWPFRelation.MACRO_TEMPLATE_DOCUMENT.getContentType()) ) { return new XWPFWordExtractor(pkg); } + if(corePart.getContentType().equals(XSLFSlideShow.MAIN_CONTENT_TYPE)) { return new XSLFPowerPointExtractor(pkg); } - throw new IllegalArgumentException("No supported documents found in the OOXML package"); + throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")"); } public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException { diff --git a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java index bbbbaa973..76daeed76 100644 --- a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java +++ b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java @@ -50,9 +50,11 @@ public class TestExtractorFactory extends TestCase { private File xls; private File xlsx; - + private File xltx; + private File doc; private File docx; + private File dotx; private File ppt; private File pptx; @@ -77,10 +79,12 @@ public class TestExtractorFactory extends TestCase { xls = new File(excel_dir, "SampleSS.xls"); xlsx = new File(excel_dir, "SampleSS.xlsx"); - + xltx = new File(excel_dir, "test.xltx"); + doc = new File(word_dir, "SampleDoc.doc"); docx = new File(word_dir, "SampleDoc.docx"); - + dotx = new File(word_dir, "test.dotx"); + ppt = new File(powerpoint_dir, "SampleShow.ppt"); pptx = new File(powerpoint_dir, "SampleShow.pptx"); @@ -104,6 +108,15 @@ public class TestExtractorFactory extends TestCase { assertTrue( ExtractorFactory.createExtractor(xlsx).getText().length() > 200 ); + + assertTrue( + ExtractorFactory.createExtractor(xltx) + instanceof XSSFExcelExtractor + ); + assertTrue( + ExtractorFactory.createExtractor(xltx).getText().contains("test") + ); + // Word assertTrue( @@ -121,7 +134,15 @@ public class TestExtractorFactory extends TestCase { assertTrue( ExtractorFactory.createExtractor(docx).getText().length() > 120 ); - + + assertTrue( + ExtractorFactory.createExtractor(dotx) + instanceof XWPFWordExtractor + ); + assertTrue( + ExtractorFactory.createExtractor(dotx).getText().contains("Test") + ); + // PowerPoint assertTrue( ExtractorFactory.createExtractor(ppt) diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/test.dotx b/src/scratchpad/testcases/org/apache/poi/hwpf/data/test.dotx new file mode 100755 index 000000000..0b74e3932 Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/test.dotx differ diff --git a/src/testcases/org/apache/poi/hssf/data/test.xltx b/src/testcases/org/apache/poi/hssf/data/test.xltx new file mode 100755 index 000000000..3974eb2f4 Binary files /dev/null and b/src/testcases/org/apache/poi/hssf/data/test.xltx differ