Adjust test for extracting some more

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1722411 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2015-12-30 20:31:50 +00:00
parent 8589924b22
commit 902830cf14

View File

@ -325,20 +325,23 @@ public class TestExtractorFactory {
@Test @Test
public void testInputStream() throws Exception { public void testInputStream() throws Exception {
// Excel // Excel
POITextExtractor extractor = ExtractorFactory.createExtractor(new FileInputStream(xls));
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(xls)) extractor
instanceof ExcelExtractor instanceof ExcelExtractor
); );
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(xls)).getText().length() > 200 extractor.getText().length() > 200
); );
extractor.close();
extractor = ExtractorFactory.createExtractor(new FileInputStream(xlsx));
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(xlsx)) extractor
instanceof XSSFExcelExtractor instanceof XSSFExcelExtractor
); );
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(xlsx)).getText().length() > 200 extractor.getText().length() > 200
); );
// TODO Support OOXML-Strict, see bug #57699 // TODO Support OOXML-Strict, see bug #57699
// assertTrue( // assertTrue(
@ -348,92 +351,113 @@ public class TestExtractorFactory {
// assertTrue( // assertTrue(
// ExtractorFactory.createExtractor(new FileInputStream(xlsxStrict)).getText().length() > 200 // ExtractorFactory.createExtractor(new FileInputStream(xlsxStrict)).getText().length() > 200
// ); // );
extractor.close();
// Word // Word
extractor = ExtractorFactory.createExtractor(new FileInputStream(doc));
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(doc)) extractor
instanceof WordExtractor instanceof WordExtractor
); );
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(doc)).getText().length() > 120 extractor.getText().length() > 120
); );
extractor.close();
extractor = ExtractorFactory.createExtractor(new FileInputStream(doc6));
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(doc6)) extractor
instanceof Word6Extractor instanceof Word6Extractor
); );
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(doc6)).getText().length() > 20 extractor.getText().length() > 20
); );
extractor.close();
extractor = ExtractorFactory.createExtractor(new FileInputStream(doc95));
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(doc95)) extractor
instanceof Word6Extractor instanceof Word6Extractor
); );
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(doc95)).getText().length() > 120 extractor.getText().length() > 120
); );
extractor.close();
extractor = ExtractorFactory.createExtractor(new FileInputStream(docx));
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(docx)) extractor
instanceof XWPFWordExtractor instanceof XWPFWordExtractor
); );
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(docx)).getText().length() > 120 extractor.getText().length() > 120
); );
extractor.close();
// PowerPoint // PowerPoint
extractor = ExtractorFactory.createExtractor(new FileInputStream(ppt));
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(ppt)) extractor
instanceof PowerPointExtractor instanceof PowerPointExtractor
); );
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(ppt)).getText().length() > 120 extractor.getText().length() > 120
); );
extractor.close();
extractor = ExtractorFactory.createExtractor(new FileInputStream(pptx));
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(pptx)) extractor
instanceof XSLFPowerPointExtractor instanceof XSLFPowerPointExtractor
); );
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(pptx)).getText().length() > 120 extractor.getText().length() > 120
); );
extractor.close();
// Visio // Visio
extractor = ExtractorFactory.createExtractor(new FileInputStream(vsd));
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(vsd)) extractor
instanceof VisioTextExtractor instanceof VisioTextExtractor
); );
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(vsd)).getText().length() > 50 extractor.getText().length() > 50
); );
extractor.close();
// Visio - vsdx // Visio - vsdx
extractor = ExtractorFactory.createExtractor(new FileInputStream(vsdx));
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(vsdx)) extractor
instanceof XDGFVisioExtractor instanceof XDGFVisioExtractor
); );
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(vsdx)).getText().length() > 20 extractor.getText().length() > 20
); );
extractor.close();
// Publisher // Publisher
extractor = ExtractorFactory.createExtractor(new FileInputStream(pub));
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(pub)) extractor
instanceof PublisherTextExtractor instanceof PublisherTextExtractor
); );
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(pub)).getText().length() > 50 extractor.getText().length() > 50
); );
extractor.close();
// Outlook msg // Outlook msg
extractor = ExtractorFactory.createExtractor(new FileInputStream(msg));
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(msg)) extractor
instanceof OutlookTextExtactor instanceof OutlookTextExtactor
); );
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(msg)).getText().length() > 50 extractor.getText().length() > 50
); );
extractor.close();
// Text // Text
try { try {