More ExtractorFactory support and tests
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@645872 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fdc07f33f8
commit
8c4372edce
@ -32,6 +32,7 @@ import org.openxml4j.opc.PackageRelationshipCollection;
|
|||||||
import org.apache.poi.POITextExtractor;
|
import org.apache.poi.POITextExtractor;
|
||||||
import org.apache.poi.POIXMLDocument;
|
import org.apache.poi.POIXMLDocument;
|
||||||
import org.apache.poi.POIXMLTextExtractor;
|
import org.apache.poi.POIXMLTextExtractor;
|
||||||
|
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
|
||||||
import org.apache.poi.hslf.extractor.PowerPointExtractor;
|
import org.apache.poi.hslf.extractor.PowerPointExtractor;
|
||||||
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
||||||
import org.apache.poi.hwpf.extractor.WordExtractor;
|
import org.apache.poi.hwpf.extractor.WordExtractor;
|
||||||
@ -109,7 +110,6 @@ public class ExtractorFactory {
|
|||||||
for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) {
|
for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) {
|
||||||
Entry entry = (Entry)entries.next();
|
Entry entry = (Entry)entries.next();
|
||||||
|
|
||||||
System.err.println(entry.getName());
|
|
||||||
if(entry.getName().equals("Workbook")) {
|
if(entry.getName().equals("Workbook")) {
|
||||||
return new ExcelExtractor(fs);
|
return new ExcelExtractor(fs);
|
||||||
}
|
}
|
||||||
@ -119,7 +119,9 @@ public class ExtractorFactory {
|
|||||||
if(entry.getName().equals("PowerPoint Document")) {
|
if(entry.getName().equals("PowerPoint Document")) {
|
||||||
return new PowerPointExtractor(fs);
|
return new PowerPointExtractor(fs);
|
||||||
}
|
}
|
||||||
// TODO - visio
|
if(entry.getName().equals("VisioDocument")) {
|
||||||
|
return new VisioTextExtractor(fs);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
|
throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
|
||||||
}
|
}
|
||||||
|
@ -17,10 +17,14 @@
|
|||||||
package org.apache.poi.extractor;
|
package org.apache.poi.extractor;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
|
||||||
import org.apache.poi.hslf.extractor.PowerPointExtractor;
|
import org.apache.poi.hslf.extractor.PowerPointExtractor;
|
||||||
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
import org.apache.poi.hssf.extractor.ExcelExtractor;
|
||||||
import org.apache.poi.hwpf.extractor.WordExtractor;
|
import org.apache.poi.hwpf.extractor.WordExtractor;
|
||||||
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
|
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
|
||||||
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
|
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
|
||||||
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
|
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
|
||||||
@ -34,6 +38,7 @@ public class TestExtractorFactory extends TestCase {
|
|||||||
private String excel_dir;
|
private String excel_dir;
|
||||||
private String word_dir;
|
private String word_dir;
|
||||||
private String powerpoint_dir;
|
private String powerpoint_dir;
|
||||||
|
private String visio_dir;
|
||||||
|
|
||||||
private File txt;
|
private File txt;
|
||||||
|
|
||||||
@ -45,6 +50,8 @@ public class TestExtractorFactory extends TestCase {
|
|||||||
|
|
||||||
private File ppt;
|
private File ppt;
|
||||||
private File pptx;
|
private File pptx;
|
||||||
|
|
||||||
|
private File vsd;
|
||||||
|
|
||||||
protected void setUp() throws Exception {
|
protected void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
@ -52,8 +59,9 @@ public class TestExtractorFactory extends TestCase {
|
|||||||
excel_dir = System.getProperty("HSSF.testdata.path");
|
excel_dir = System.getProperty("HSSF.testdata.path");
|
||||||
word_dir = System.getProperty("HWPF.testdata.path");
|
word_dir = System.getProperty("HWPF.testdata.path");
|
||||||
powerpoint_dir = System.getProperty("HSLF.testdata.path");
|
powerpoint_dir = System.getProperty("HSLF.testdata.path");
|
||||||
|
visio_dir = System.getProperty("HDGF.testdata.path");
|
||||||
|
|
||||||
txt = new File(excel_dir, "SampleSS.txt");
|
txt = new File(powerpoint_dir, "SampleShow.txt");
|
||||||
|
|
||||||
xls = new File(excel_dir, "SampleSS.xls");
|
xls = new File(excel_dir, "SampleSS.xls");
|
||||||
xlsx = new File(excel_dir, "SampleSS.xlsx");
|
xlsx = new File(excel_dir, "SampleSS.xlsx");
|
||||||
@ -63,6 +71,8 @@ public class TestExtractorFactory extends TestCase {
|
|||||||
|
|
||||||
ppt = new File(powerpoint_dir, "SampleShow.ppt");
|
ppt = new File(powerpoint_dir, "SampleShow.ppt");
|
||||||
pptx = new File(powerpoint_dir, "SampleShow.pptx");
|
pptx = new File(powerpoint_dir, "SampleShow.pptx");
|
||||||
|
|
||||||
|
vsd = new File(visio_dir, "Test_Visio-Some_Random_Text.vsd");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFile() throws Exception {
|
public void testFile() throws Exception {
|
||||||
@ -118,7 +128,13 @@ public class TestExtractorFactory extends TestCase {
|
|||||||
);
|
);
|
||||||
|
|
||||||
// Visio
|
// Visio
|
||||||
// TODO
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(vsd)
|
||||||
|
instanceof VisioTextExtractor
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(vsd).getText().length() > 50
|
||||||
|
);
|
||||||
|
|
||||||
// Text
|
// Text
|
||||||
try {
|
try {
|
||||||
@ -128,12 +144,123 @@ public class TestExtractorFactory extends TestCase {
|
|||||||
// Good
|
// Good
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testInputStream() throws Exception {
|
public void testInputStream() throws Exception {
|
||||||
|
// Excel
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new FileInputStream(xls))
|
||||||
|
instanceof ExcelExtractor
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new FileInputStream(xls)).getText().length() > 200
|
||||||
|
);
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new FileInputStream(xlsx))
|
||||||
|
instanceof XSSFExcelExtractor
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new FileInputStream(xlsx)).getText().length() > 200
|
||||||
|
);
|
||||||
|
|
||||||
|
// Word
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new FileInputStream(doc))
|
||||||
|
instanceof WordExtractor
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new FileInputStream(doc)).getText().length() > 120
|
||||||
|
);
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new FileInputStream(docx))
|
||||||
|
instanceof XWPFWordExtractor
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new FileInputStream(docx)).getText().length() > 120
|
||||||
|
);
|
||||||
|
|
||||||
|
// PowerPoint
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new FileInputStream(ppt))
|
||||||
|
instanceof PowerPointExtractor
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new FileInputStream(ppt)).getText().length() > 120
|
||||||
|
);
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new FileInputStream(pptx))
|
||||||
|
instanceof XSLFPowerPointExtractor
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new FileInputStream(pptx)).getText().length() > 120
|
||||||
|
);
|
||||||
|
|
||||||
|
// Visio
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new FileInputStream(vsd))
|
||||||
|
instanceof VisioTextExtractor
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new FileInputStream(vsd)).getText().length() > 50
|
||||||
|
);
|
||||||
|
|
||||||
|
// Text
|
||||||
|
try {
|
||||||
|
ExtractorFactory.createExtractor(new FileInputStream(txt));
|
||||||
|
fail();
|
||||||
|
} catch(IllegalArgumentException e) {
|
||||||
|
// Good
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPOIFS() throws Exception {
|
public void testPOIFS() throws Exception {
|
||||||
|
// Excel
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)))
|
||||||
|
instanceof ExcelExtractor
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))).getText().length() > 200
|
||||||
|
);
|
||||||
|
|
||||||
|
// Word
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc)))
|
||||||
|
instanceof WordExtractor
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc))).getText().length() > 120
|
||||||
|
);
|
||||||
|
|
||||||
|
// PowerPoint
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt)))
|
||||||
|
instanceof PowerPointExtractor
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt))).getText().length() > 120
|
||||||
|
);
|
||||||
|
|
||||||
|
// Visio
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd)))
|
||||||
|
instanceof VisioTextExtractor
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd))).getText().length() > 50
|
||||||
|
);
|
||||||
|
|
||||||
|
// Text
|
||||||
|
try {
|
||||||
|
ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(txt)));
|
||||||
|
fail();
|
||||||
|
} catch(IOException e) {
|
||||||
|
// Good
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPackage() throws Exception {
|
public void testPackage() throws Exception {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user