Fixed ExtractorFactory to support .xltx and .dotx files, see Bugzilla 47517
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@795327 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0662f7e862
commit
3304fa1887
@ -94,18 +94,27 @@ public class ExtractorFactory {
|
||||
if(core.size() != 1) {
|
||||
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
|
||||
}
|
||||
|
||||
PackagePart corePart = pkg.getPart(core.getRelationship(0));
|
||||
if(corePart.getContentType().equals(XSSFRelation.WORKBOOK.getContentType())) {
|
||||
return new XSSFExcelExtractor(pkg);
|
||||
}
|
||||
if(corePart.getContentType().equals(XWPFRelation.DOCUMENT.getContentType())) {
|
||||
|
||||
PackagePart corePart = pkg.getPart(core.getRelationship(0));
|
||||
if (corePart.getContentType().equals(XSSFRelation.WORKBOOK.getContentType()) ||
|
||||
corePart.getContentType().equals(XSSFRelation.MACRO_TEMPLATE_WORKBOOK.getContentType()) ||
|
||||
corePart.getContentType().equals(XSSFRelation.MACRO_ADDIN_WORKBOOK.getContentType()) ||
|
||||
corePart.getContentType().equals(XSSFRelation.TEMPLATE_WORKBOOK.getContentType()) ||
|
||||
corePart.getContentType().equals(XSSFRelation.MACROS_WORKBOOK.getContentType())) {
|
||||
return new XSSFExcelExtractor(pkg);
|
||||
}
|
||||
|
||||
if(corePart.getContentType().equals(XWPFRelation.DOCUMENT.getContentType()) ||
|
||||
corePart.getContentType().equals(XWPFRelation.TEMPLATE.getContentType()) ||
|
||||
corePart.getContentType().equals(XWPFRelation.MACRO_DOCUMENT.getContentType()) ||
|
||||
corePart.getContentType().equals(XWPFRelation.MACRO_TEMPLATE_DOCUMENT.getContentType()) ) {
|
||||
return new XWPFWordExtractor(pkg);
|
||||
}
|
||||
|
||||
if(corePart.getContentType().equals(XSLFSlideShow.MAIN_CONTENT_TYPE)) {
|
||||
return new XSLFPowerPointExtractor(pkg);
|
||||
}
|
||||
throw new IllegalArgumentException("No supported documents found in the OOXML package");
|
||||
throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")");
|
||||
}
|
||||
|
||||
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
|
||||
|
@ -50,9 +50,11 @@ public class TestExtractorFactory extends TestCase {
|
||||
|
||||
private File xls;
|
||||
private File xlsx;
|
||||
|
||||
private File xltx;
|
||||
|
||||
private File doc;
|
||||
private File docx;
|
||||
private File dotx;
|
||||
|
||||
private File ppt;
|
||||
private File pptx;
|
||||
@ -77,10 +79,12 @@ public class TestExtractorFactory extends TestCase {
|
||||
|
||||
xls = new File(excel_dir, "SampleSS.xls");
|
||||
xlsx = new File(excel_dir, "SampleSS.xlsx");
|
||||
|
||||
xltx = new File(excel_dir, "test.xltx");
|
||||
|
||||
doc = new File(word_dir, "SampleDoc.doc");
|
||||
docx = new File(word_dir, "SampleDoc.docx");
|
||||
|
||||
dotx = new File(word_dir, "test.dotx");
|
||||
|
||||
ppt = new File(powerpoint_dir, "SampleShow.ppt");
|
||||
pptx = new File(powerpoint_dir, "SampleShow.pptx");
|
||||
|
||||
@ -104,6 +108,15 @@ public class TestExtractorFactory extends TestCase {
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(xlsx).getText().length() > 200
|
||||
);
|
||||
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(xltx)
|
||||
instanceof XSSFExcelExtractor
|
||||
);
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(xltx).getText().contains("test")
|
||||
);
|
||||
|
||||
|
||||
// Word
|
||||
assertTrue(
|
||||
@ -121,7 +134,15 @@ public class TestExtractorFactory extends TestCase {
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(docx).getText().length() > 120
|
||||
);
|
||||
|
||||
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(dotx)
|
||||
instanceof XWPFWordExtractor
|
||||
);
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(dotx).getText().contains("Test")
|
||||
);
|
||||
|
||||
// PowerPoint
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(ppt)
|
||||
|
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/test.dotx
Executable file
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/test.dotx
Executable file
Binary file not shown.
BIN
src/testcases/org/apache/poi/hssf/data/test.xltx
Executable file
BIN
src/testcases/org/apache/poi/hssf/data/test.xltx
Executable file
Binary file not shown.
Loading…
Reference in New Issue
Block a user