2014-11-29 19:16:23 -05:00
|
|
|
/* ====================================================================
|
|
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
|
|
this work for additional information regarding copyright ownership.
|
|
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
(the "License"); you may not use this file except in compliance with
|
|
|
|
the License. You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
==================================================================== */
|
|
|
|
|
|
|
|
package org.apache.poi.hssf.extractor;
|
|
|
|
|
2014-11-30 09:36:01 -05:00
|
|
|
import java.io.File;
|
2014-11-29 19:16:23 -05:00
|
|
|
import java.io.InputStream;
|
|
|
|
|
2014-11-30 09:36:01 -05:00
|
|
|
import org.apache.poi.POITestCase;
|
2014-11-29 19:16:23 -05:00
|
|
|
import org.apache.poi.hssf.HSSFTestDataSamples;
|
|
|
|
|
|
|
|
/**
|
2014-11-30 09:22:06 -05:00
|
|
|
* Unit tests for the Excel 5/95 and Excel 4 (and older) text
|
|
|
|
* extractor
|
2014-11-29 19:16:23 -05:00
|
|
|
*/
|
2014-11-30 09:36:01 -05:00
|
|
|
public final class TestOldExcelExtractor extends POITestCase {
|
2014-11-29 19:16:23 -05:00
|
|
|
private static OldExcelExtractor createExtractor(String sampleFileName) {
|
|
|
|
InputStream is = HSSFTestDataSamples.openSampleFileStream(sampleFileName);
|
|
|
|
|
|
|
|
try {
|
|
|
|
return new OldExcelExtractor(is);
|
|
|
|
} catch (Exception e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
}
|
2014-11-30 09:36:01 -05:00
|
|
|
|
2014-11-30 09:22:06 -05:00
|
|
|
public void testSimpleExcel4() {
|
2014-11-29 19:16:23 -05:00
|
|
|
OldExcelExtractor extractor = createExtractor("testEXCEL_4.xls");
|
|
|
|
|
|
|
|
// Check we can call getText without error
|
|
|
|
String text = extractor.getText();
|
|
|
|
|
|
|
|
// Check we find a few words we expect in there
|
2014-11-30 09:36:01 -05:00
|
|
|
assertContains(text, "Size");
|
|
|
|
assertContains(text, "Returns");
|
2014-11-29 19:48:17 -05:00
|
|
|
|
|
|
|
// Check we find a few numbers we expect in there
|
2014-11-30 09:36:01 -05:00
|
|
|
assertContains(text, "11");
|
|
|
|
assertContains(text, "784");
|
2014-11-29 19:16:23 -05:00
|
|
|
}
|
2014-11-30 10:54:34 -05:00
|
|
|
public void testSimpleExcel5() {
|
2014-11-30 09:22:06 -05:00
|
|
|
for (String ver : new String[] {"5", "95"}) {
|
|
|
|
OldExcelExtractor extractor = createExtractor("testEXCEL_"+ver+".xls");
|
|
|
|
|
|
|
|
// Check we can call getText without error
|
|
|
|
String text = extractor.getText();
|
|
|
|
|
|
|
|
// Check we find a few words we expect in there
|
2014-11-30 09:36:01 -05:00
|
|
|
assertContains(text, "Sample Excel");
|
|
|
|
assertContains(text, "Written and saved");
|
2014-11-30 09:22:06 -05:00
|
|
|
|
|
|
|
// Check we find a few numbers we expect in there
|
2014-11-30 09:36:01 -05:00
|
|
|
assertContains(text, "15");
|
|
|
|
assertContains(text, "169");
|
2014-11-30 09:22:06 -05:00
|
|
|
}
|
|
|
|
}
|
2014-11-29 19:16:23 -05:00
|
|
|
|
2014-11-29 19:48:17 -05:00
|
|
|
public void testStrings() {
|
|
|
|
OldExcelExtractor extractor = createExtractor("testEXCEL_4.xls");
|
|
|
|
String text = extractor.getText();
|
|
|
|
|
|
|
|
// Simple strings
|
2014-11-30 09:36:01 -05:00
|
|
|
assertContains(text, "Table 10 -- Examination Coverage:");
|
|
|
|
assertContains(text, "Recommended and Average Recommended Additional Tax After");
|
|
|
|
assertContains(text, "Individual income tax returns, total");
|
2014-11-29 19:48:17 -05:00
|
|
|
|
|
|
|
// More complicated strings
|
2014-11-30 09:36:01 -05:00
|
|
|
assertContains(text, "$100,000 or more");
|
|
|
|
assertContains(text, "S corporation returns, Form 1120S [10,15]");
|
2014-11-30 11:21:39 -05:00
|
|
|
assertContains(text, "individual income tax return \u201Cshort forms.\u201D");
|
2014-11-29 19:48:17 -05:00
|
|
|
|
|
|
|
// Formula based strings
|
|
|
|
// TODO Find some then test
|
|
|
|
}
|
|
|
|
|
2014-11-30 09:22:06 -05:00
|
|
|
public void testFormattedNumbersExcel4() {
|
2014-11-29 19:48:17 -05:00
|
|
|
OldExcelExtractor extractor = createExtractor("testEXCEL_4.xls");
|
|
|
|
String text = extractor.getText();
|
|
|
|
|
|
|
|
// Simple numbers
|
2014-11-30 09:36:01 -05:00
|
|
|
assertContains(text, "151");
|
|
|
|
assertContains(text, "784");
|
2014-11-29 19:48:17 -05:00
|
|
|
|
|
|
|
// Numbers which come from formulas
|
2014-11-30 09:36:01 -05:00
|
|
|
assertContains(text, "0.398"); // TODO Rounding
|
|
|
|
assertContains(text, "624");
|
2014-11-29 19:48:17 -05:00
|
|
|
|
|
|
|
// Formatted numbers
|
|
|
|
// TODO
|
2014-11-30 09:36:01 -05:00
|
|
|
// assertContains(text, "55,624");
|
|
|
|
// assertContains(text, "11,743,477");
|
2014-11-29 19:48:17 -05:00
|
|
|
}
|
2014-11-30 10:54:34 -05:00
|
|
|
public void testFormattedNumbersExcel5() {
|
2014-11-30 09:22:06 -05:00
|
|
|
for (String ver : new String[] {"5", "95"}) {
|
|
|
|
OldExcelExtractor extractor = createExtractor("testEXCEL_"+ver+".xls");
|
|
|
|
String text = extractor.getText();
|
|
|
|
|
|
|
|
// Simple numbers
|
2014-11-30 09:36:01 -05:00
|
|
|
assertContains(text, "1");
|
2014-11-30 09:22:06 -05:00
|
|
|
|
|
|
|
// Numbers which come from formulas
|
2014-11-30 09:36:01 -05:00
|
|
|
assertContains(text, "13");
|
|
|
|
assertContains(text, "169");
|
2014-11-30 10:54:34 -05:00
|
|
|
|
|
|
|
// Formatted numbers
|
|
|
|
// TODO
|
2014-11-30 11:01:28 -05:00
|
|
|
// assertContains(text, "100.00%");
|
|
|
|
// assertContains(text, "155.00%");
|
|
|
|
// assertContains(text, "1,125");
|
|
|
|
// assertContains(text, "189,945");
|
|
|
|
// assertContains(text, "1,234,500");
|
|
|
|
// assertContains(text, "$169.00");
|
|
|
|
// assertContains(text, "$1,253.82");
|
2014-11-30 09:36:01 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public void testFromFile() throws Exception {
|
|
|
|
for (String ver : new String[] {"4", "5", "95"}) {
|
|
|
|
String filename = "testEXCEL_"+ver+".xls";
|
|
|
|
File f = HSSFTestDataSamples.getSampleFile(filename);
|
|
|
|
|
|
|
|
OldExcelExtractor extractor = new OldExcelExtractor(f);
|
|
|
|
String text = extractor.getText();
|
|
|
|
assertNotNull(text);
|
|
|
|
assertTrue(text.length() > 100);
|
2014-11-30 09:22:06 -05:00
|
|
|
}
|
|
|
|
}
|
2014-11-29 19:16:23 -05:00
|
|
|
}
|