Fixed ExtractorFactory to support .xltx and .dotx files, see Bugzilla 47517

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@795327 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2009-07-18 09:09:11 +00:00
parent 0662f7e862
commit 3304fa1887
4 changed files with 41 additions and 11 deletions

View File

@ -95,17 +95,26 @@ public class ExtractorFactory {
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
}
PackagePart corePart = pkg.getPart(core.getRelationship(0));
if(corePart.getContentType().equals(XSSFRelation.WORKBOOK.getContentType())) {
return new XSSFExcelExtractor(pkg);
}
if(corePart.getContentType().equals(XWPFRelation.DOCUMENT.getContentType())) {
PackagePart corePart = pkg.getPart(core.getRelationship(0));
if (corePart.getContentType().equals(XSSFRelation.WORKBOOK.getContentType()) ||
corePart.getContentType().equals(XSSFRelation.MACRO_TEMPLATE_WORKBOOK.getContentType()) ||
corePart.getContentType().equals(XSSFRelation.MACRO_ADDIN_WORKBOOK.getContentType()) ||
corePart.getContentType().equals(XSSFRelation.TEMPLATE_WORKBOOK.getContentType()) ||
corePart.getContentType().equals(XSSFRelation.MACROS_WORKBOOK.getContentType())) {
return new XSSFExcelExtractor(pkg);
}
if(corePart.getContentType().equals(XWPFRelation.DOCUMENT.getContentType()) ||
corePart.getContentType().equals(XWPFRelation.TEMPLATE.getContentType()) ||
corePart.getContentType().equals(XWPFRelation.MACRO_DOCUMENT.getContentType()) ||
corePart.getContentType().equals(XWPFRelation.MACRO_TEMPLATE_DOCUMENT.getContentType()) ) {
return new XWPFWordExtractor(pkg);
}
if(corePart.getContentType().equals(XSLFSlideShow.MAIN_CONTENT_TYPE)) {
return new XSLFPowerPointExtractor(pkg);
}
throw new IllegalArgumentException("No supported documents found in the OOXML package");
throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")");
}
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {

View File

@ -50,9 +50,11 @@ public class TestExtractorFactory extends TestCase {
private File xls;
private File xlsx;
private File xltx;
private File doc;
private File docx;
private File dotx;
private File ppt;
private File pptx;
@ -77,9 +79,11 @@ public class TestExtractorFactory extends TestCase {
xls = new File(excel_dir, "SampleSS.xls");
xlsx = new File(excel_dir, "SampleSS.xlsx");
xltx = new File(excel_dir, "test.xltx");
doc = new File(word_dir, "SampleDoc.doc");
docx = new File(word_dir, "SampleDoc.docx");
dotx = new File(word_dir, "test.dotx");
ppt = new File(powerpoint_dir, "SampleShow.ppt");
pptx = new File(powerpoint_dir, "SampleShow.pptx");
@ -105,6 +109,15 @@ public class TestExtractorFactory extends TestCase {
ExtractorFactory.createExtractor(xlsx).getText().length() > 200
);
assertTrue(
ExtractorFactory.createExtractor(xltx)
instanceof XSSFExcelExtractor
);
assertTrue(
ExtractorFactory.createExtractor(xltx).getText().contains("test")
);
// Word
assertTrue(
ExtractorFactory.createExtractor(doc)
@ -122,6 +135,14 @@ public class TestExtractorFactory extends TestCase {
ExtractorFactory.createExtractor(docx).getText().length() > 120
);
assertTrue(
ExtractorFactory.createExtractor(dotx)
instanceof XWPFWordExtractor
);
assertTrue(
ExtractorFactory.createExtractor(dotx).getText().contains("Test")
);
// PowerPoint
assertTrue(
ExtractorFactory.createExtractor(ppt)

Binary file not shown.

Binary file not shown.