Implement an Excel text extractor, and put all the existing text extractors under a common superclass, so they're easier to find and use
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@589224 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e145f417b7
commit
9d3658e72f
49
src/java/org/apache/poi/POITextExtractor.java
Normal file
49
src/java/org/apache/poi/POITextExtractor.java
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Common Parent for Text Extractors
|
||||||
|
* of POI Documents.
|
||||||
|
* You will typically find the implementation of
|
||||||
|
* a given format's text extractor under
|
||||||
|
* org.apache.poi.[format].extractor .
|
||||||
|
* @see org.apache.poi.hssf.extractor.ExcelExtractor
|
||||||
|
* @see org.apache.poi.hslf.extractor.PowerPointExtractor
|
||||||
|
* @see org.apache.poi.hdgf.extractor.VisioTextExtractor
|
||||||
|
* @see org.apache.poi.hwpf.extractor.WordExtractor
|
||||||
|
*/
|
||||||
|
public abstract class POITextExtractor {
|
||||||
|
/** The POIDocument that's open */
|
||||||
|
protected POIDocument document;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new text extractor for the given document
|
||||||
|
*/
|
||||||
|
public POITextExtractor(POIDocument document) {
|
||||||
|
this.document = document;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves all the text from the document.
|
||||||
|
* How cells, paragraphs etc are separated in the text
|
||||||
|
* is implementation specific - see the javadocs for
|
||||||
|
* a specific project for details.
|
||||||
|
* @return All the text from the document
|
||||||
|
*/
|
||||||
|
public abstract String getText();
|
||||||
|
}
|
144
src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java
Normal file
144
src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.hssf.extractor;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.poi.POITextExtractor;
|
||||||
|
import org.apache.poi.hssf.usermodel.HSSFCell;
|
||||||
|
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
|
||||||
|
import org.apache.poi.hssf.usermodel.HSSFRow;
|
||||||
|
import org.apache.poi.hssf.usermodel.HSSFSheet;
|
||||||
|
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
||||||
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A text extractor for Excel files.
|
||||||
|
* Returns the textual content of the file, suitable for
|
||||||
|
* indexing by something like Lucene, but not really
|
||||||
|
* intended for display to the user.
|
||||||
|
* To turn an excel file into a CSV or similar, then see
|
||||||
|
* the XLS2CSVmra example
|
||||||
|
* @see org.apache.poi.hssf.eventusermodel.examples.XLS2CSVmra
|
||||||
|
*/
|
||||||
|
public class ExcelExtractor extends POITextExtractor{
|
||||||
|
private HSSFWorkbook wb;
|
||||||
|
private boolean includeSheetNames = true;
|
||||||
|
private boolean formulasNotResults = false;
|
||||||
|
|
||||||
|
public ExcelExtractor(HSSFWorkbook wb) {
|
||||||
|
super(wb);
|
||||||
|
this.wb = wb;
|
||||||
|
}
|
||||||
|
public ExcelExtractor(POIFSFileSystem fs) throws IOException {
|
||||||
|
this(new HSSFWorkbook(fs));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Should sheet names be included? Default is true
|
||||||
|
*/
|
||||||
|
public void setIncludeSheetNames(boolean includeSheetNames) {
|
||||||
|
this.includeSheetNames = includeSheetNames;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Should we return the formula itself, and not
|
||||||
|
* the result it produces? Default is false
|
||||||
|
*/
|
||||||
|
public void setFormulasNotResults(boolean formulasNotResults) {
|
||||||
|
this.formulasNotResults = formulasNotResults;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retreives the text contents of the file
|
||||||
|
*/
|
||||||
|
public String getText() {
|
||||||
|
StringBuffer text = new StringBuffer();
|
||||||
|
|
||||||
|
for(int i=0;i<wb.getNumberOfSheets();i++) {
|
||||||
|
HSSFSheet sheet = wb.getSheetAt(i);
|
||||||
|
if(sheet == null) { continue; }
|
||||||
|
|
||||||
|
if(includeSheetNames) {
|
||||||
|
String name = wb.getSheetName(i);
|
||||||
|
if(name != null) {
|
||||||
|
text.append(name);
|
||||||
|
text.append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int firstRow = sheet.getFirstRowNum();
|
||||||
|
int lastRow = sheet.getLastRowNum();
|
||||||
|
for(int j=firstRow;j<=lastRow;j++) {
|
||||||
|
HSSFRow row = sheet.getRow(j);
|
||||||
|
if(row == null) { continue; }
|
||||||
|
|
||||||
|
// Check each cell in turn
|
||||||
|
int firstCell = row.getFirstCellNum();
|
||||||
|
int lastCell = row.getLastCellNum();
|
||||||
|
for(int k=firstCell;k<lastCell;k++) {
|
||||||
|
HSSFCell cell = row.getCell((short)k);
|
||||||
|
boolean outputContents = false;
|
||||||
|
if(cell == null) { continue; }
|
||||||
|
|
||||||
|
switch(cell.getCellType()) {
|
||||||
|
case HSSFCell.CELL_TYPE_STRING:
|
||||||
|
text.append(cell.getRichStringCellValue().getString());
|
||||||
|
outputContents = true;
|
||||||
|
break;
|
||||||
|
case HSSFCell.CELL_TYPE_NUMERIC:
|
||||||
|
// Note - we don't apply any formatting!
|
||||||
|
text.append(cell.getNumericCellValue());
|
||||||
|
outputContents = true;
|
||||||
|
break;
|
||||||
|
case HSSFCell.CELL_TYPE_BOOLEAN:
|
||||||
|
text.append(cell.getBooleanCellValue());
|
||||||
|
outputContents = true;
|
||||||
|
break;
|
||||||
|
case HSSFCell.CELL_TYPE_FORMULA:
|
||||||
|
if(formulasNotResults) {
|
||||||
|
text.append(cell.getCellFormula());
|
||||||
|
} else {
|
||||||
|
// Try it as a string, if not as a number
|
||||||
|
HSSFRichTextString str =
|
||||||
|
cell.getRichStringCellValue();
|
||||||
|
if(str != null && str.length() > 0) {
|
||||||
|
text.append(str.toString());
|
||||||
|
} else {
|
||||||
|
// Try and treat it as a number
|
||||||
|
double val = cell.getNumericCellValue();
|
||||||
|
text.append(val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
outputContents = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output a tab if we're not on the last cell
|
||||||
|
if(outputContents && k < (lastCell-1)) {
|
||||||
|
text.append("\t");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finish off the row
|
||||||
|
text.append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return text.toString();
|
||||||
|
}
|
||||||
|
}
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
import org.apache.poi.POITextExtractor;
|
||||||
import org.apache.poi.hdgf.HDGFDiagram;
|
import org.apache.poi.hdgf.HDGFDiagram;
|
||||||
import org.apache.poi.hdgf.chunks.Chunk.Command;
|
import org.apache.poi.hdgf.chunks.Chunk.Command;
|
||||||
import org.apache.poi.hdgf.streams.ChunkStream;
|
import org.apache.poi.hdgf.streams.ChunkStream;
|
||||||
@ -33,11 +34,12 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
|||||||
* Can opperate on the command line (outputs to stdout), or
|
* Can opperate on the command line (outputs to stdout), or
|
||||||
* can return the text for you (eg for use with Lucene).
|
* can return the text for you (eg for use with Lucene).
|
||||||
*/
|
*/
|
||||||
public class VisioTextExtractor {
|
public class VisioTextExtractor extends POITextExtractor {
|
||||||
private HDGFDiagram hdgf;
|
private HDGFDiagram hdgf;
|
||||||
private POIFSFileSystem fs;
|
private POIFSFileSystem fs;
|
||||||
|
|
||||||
public VisioTextExtractor(HDGFDiagram hdgf) {
|
public VisioTextExtractor(HDGFDiagram hdgf) {
|
||||||
|
super(hdgf);
|
||||||
this.hdgf = hdgf;
|
this.hdgf = hdgf;
|
||||||
}
|
}
|
||||||
public VisioTextExtractor(POIFSFileSystem fs) throws IOException {
|
public VisioTextExtractor(POIFSFileSystem fs) throws IOException {
|
||||||
@ -84,6 +86,8 @@ public class VisioTextExtractor {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the textual contents of the file.
|
* Returns the textual contents of the file.
|
||||||
|
* Each textual object's text will be separated
|
||||||
|
* by a newline
|
||||||
*/
|
*/
|
||||||
public String getText() {
|
public String getText() {
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuffer text = new StringBuffer();
|
||||||
|
@ -22,6 +22,8 @@ package org.apache.poi.hslf.extractor;
|
|||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
|
||||||
|
import org.apache.poi.POITextExtractor;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
import org.apache.poi.hslf.*;
|
import org.apache.poi.hslf.*;
|
||||||
import org.apache.poi.hslf.model.*;
|
import org.apache.poi.hslf.model.*;
|
||||||
@ -34,12 +36,12 @@ import org.apache.poi.hslf.usermodel.*;
|
|||||||
* @author Nick Burch
|
* @author Nick Burch
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class PowerPointExtractor
|
public class PowerPointExtractor extends POITextExtractor
|
||||||
{
|
{
|
||||||
private HSLFSlideShow _hslfshow;
|
private HSLFSlideShow _hslfshow;
|
||||||
private SlideShow _show;
|
private SlideShow _show;
|
||||||
private Slide[] _slides;
|
private Slide[] _slides;
|
||||||
private Notes[] _notes;
|
private Notes[] _notes;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Basic extractor. Returns all the text, and optionally all the notes
|
* Basic extractor. Returns all the text, and optionally all the notes
|
||||||
@ -66,61 +68,50 @@ public class PowerPointExtractor
|
|||||||
ppe.close();
|
ppe.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a PowerPointExtractor, from a file
|
* Creates a PowerPointExtractor, from a file
|
||||||
* @param fileName The name of the file to extract from
|
* @param fileName The name of the file to extract from
|
||||||
*/
|
*/
|
||||||
public PowerPointExtractor(String fileName) throws IOException {
|
public PowerPointExtractor(String fileName) throws IOException {
|
||||||
_hslfshow = new HSLFSlideShow(fileName);
|
this(new FileInputStream(fileName));
|
||||||
_show = new SlideShow(_hslfshow);
|
}
|
||||||
_slides = _show.getSlides();
|
/**
|
||||||
_notes = _show.getNotes();
|
* Creates a PowerPointExtractor, from an Input Stream
|
||||||
}
|
* @param iStream The input stream containing the PowerPoint document
|
||||||
|
*/
|
||||||
|
public PowerPointExtractor(InputStream iStream) throws IOException {
|
||||||
|
this(new POIFSFileSystem(iStream));
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Creates a PowerPointExtractor, from an open POIFSFileSystem
|
||||||
|
* @param fs the POIFSFileSystem containing the PowerPoint document
|
||||||
|
*/
|
||||||
|
public PowerPointExtractor(POIFSFileSystem fs) throws IOException {
|
||||||
|
this(new HSLFSlideShow(fs));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a PowerPointExtractor, from an Input Stream
|
* Creates a PowerPointExtractor, from a HSLFSlideShow
|
||||||
* @param iStream The input stream containing the PowerPoint document
|
* @param ss the HSLFSlideShow to extract text from
|
||||||
*/
|
*/
|
||||||
public PowerPointExtractor(InputStream iStream) throws IOException {
|
public PowerPointExtractor(HSLFSlideShow ss) throws IOException {
|
||||||
_hslfshow = new HSLFSlideShow(iStream);
|
super(ss);
|
||||||
_show = new SlideShow(_hslfshow);
|
_hslfshow = ss;
|
||||||
_slides = _show.getSlides();
|
_show = new SlideShow(_hslfshow);
|
||||||
_notes = _show.getNotes();
|
_slides = _show.getSlides();
|
||||||
}
|
_notes = _show.getNotes();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a PowerPointExtractor, from an open POIFSFileSystem
|
* Shuts down the underlying streams
|
||||||
* @param fs the POIFSFileSystem containing the PowerPoint document
|
*/
|
||||||
*/
|
public void close() throws IOException {
|
||||||
public PowerPointExtractor(POIFSFileSystem fs) throws IOException {
|
_hslfshow.close();
|
||||||
_hslfshow = new HSLFSlideShow(fs);
|
_hslfshow = null;
|
||||||
_show = new SlideShow(_hslfshow);
|
_show = null;
|
||||||
_slides = _show.getSlides();
|
_slides = null;
|
||||||
_notes = _show.getNotes();
|
_notes = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a PowerPointExtractor, from a HSLFSlideShow
|
|
||||||
* @param ss the HSLFSlideShow to extract text from
|
|
||||||
*/
|
|
||||||
public PowerPointExtractor(HSLFSlideShow ss) throws IOException {
|
|
||||||
_hslfshow = ss;
|
|
||||||
_show = new SlideShow(_hslfshow);
|
|
||||||
_slides = _show.getSlides();
|
|
||||||
_notes = _show.getNotes();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Shuts down the underlying streams
|
|
||||||
*/
|
|
||||||
public void close() throws IOException {
|
|
||||||
_hslfshow.close();
|
|
||||||
_hslfshow = null;
|
|
||||||
_show = null;
|
|
||||||
_slides = null;
|
|
||||||
_notes = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -195,4 +186,4 @@ public class PowerPointExtractor
|
|||||||
|
|
||||||
return ret.toString();
|
return ret.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -22,6 +22,7 @@ import java.io.FileInputStream;
|
|||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import org.apache.poi.POITextExtractor;
|
||||||
import org.apache.poi.hwpf.HWPFDocument;
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
import org.apache.poi.hwpf.model.TextPiece;
|
import org.apache.poi.hwpf.model.TextPiece;
|
||||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||||
@ -36,7 +37,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
|||||||
*
|
*
|
||||||
* @author Nick Burch (nick at torchbox dot com)
|
* @author Nick Burch (nick at torchbox dot com)
|
||||||
*/
|
*/
|
||||||
public class WordExtractor {
|
public class WordExtractor extends POITextExtractor {
|
||||||
private POIFSFileSystem fs;
|
private POIFSFileSystem fs;
|
||||||
private HWPFDocument doc;
|
private HWPFDocument doc;
|
||||||
|
|
||||||
@ -62,6 +63,7 @@ public class WordExtractor {
|
|||||||
* @param doc The HWPFDocument to extract from
|
* @param doc The HWPFDocument to extract from
|
||||||
*/
|
*/
|
||||||
public WordExtractor(HWPFDocument doc) throws IOException {
|
public WordExtractor(HWPFDocument doc) throws IOException {
|
||||||
|
super(doc);
|
||||||
this.doc = doc;
|
this.doc = doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,101 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.hssf.extractor;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
|
||||||
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
public class TestExcelExtractor extends TestCase {
|
||||||
|
public void testSimple() throws Exception {
|
||||||
|
String path = System.getProperty("HSSF.testdata.path");
|
||||||
|
FileInputStream fin = new FileInputStream(path + File.separator + "Simple.xls");
|
||||||
|
|
||||||
|
ExcelExtractor extractor = new ExcelExtractor(new POIFSFileSystem(fin));
|
||||||
|
|
||||||
|
assertEquals("Sheet1\nreplaceMe\nSheet2\nSheet3\n", extractor.getText());
|
||||||
|
|
||||||
|
// Now turn off sheet names
|
||||||
|
extractor.setIncludeSheetNames(false);
|
||||||
|
assertEquals("replaceMe\n", extractor.getText());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNumericFormula() throws Exception {
|
||||||
|
String path = System.getProperty("HSSF.testdata.path");
|
||||||
|
FileInputStream fin = new FileInputStream(path + File.separator + "sumifformula.xls");
|
||||||
|
|
||||||
|
ExcelExtractor extractor = new ExcelExtractor(new POIFSFileSystem(fin));
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
"Sheet1\n" +
|
||||||
|
"1000.0\t1.0\t5.0\n" +
|
||||||
|
"2000.0\t2.0\t\n" +
|
||||||
|
"3000.0\t3.0\t\n" +
|
||||||
|
"4000.0\t4.0\t\n" +
|
||||||
|
"5000.0\t5.0\t\n" +
|
||||||
|
"Sheet2\nSheet3\n",
|
||||||
|
extractor.getText()
|
||||||
|
);
|
||||||
|
|
||||||
|
extractor.setFormulasNotResults(true);
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
"Sheet1\n" +
|
||||||
|
"1000.0\t1.0\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
|
||||||
|
"2000.0\t2.0\t\n" +
|
||||||
|
"3000.0\t3.0\t\n" +
|
||||||
|
"4000.0\t4.0\t\n" +
|
||||||
|
"5000.0\t5.0\t\n" +
|
||||||
|
"Sheet2\nSheet3\n",
|
||||||
|
extractor.getText()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void testStringConcat() throws Exception {
|
||||||
|
String path = System.getProperty("HSSF.testdata.path");
|
||||||
|
FileInputStream fin = new FileInputStream(path + File.separator + "SimpleWithFormula.xls");
|
||||||
|
|
||||||
|
ExcelExtractor extractor = new ExcelExtractor(new POIFSFileSystem(fin));
|
||||||
|
|
||||||
|
// Comes out as NaN if treated as a number
|
||||||
|
// And as XYZ if treated as a string
|
||||||
|
assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", extractor.getText());
|
||||||
|
|
||||||
|
extractor.setFormulasNotResults(true);
|
||||||
|
|
||||||
|
assertEquals("Sheet1\nreplaceme\nreplaceme\nCONCATENATE(A1,A2)\nSheet2\nSheet3\n", extractor.getText());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testStringFormula() throws Exception {
|
||||||
|
String path = System.getProperty("HSSF.testdata.path");
|
||||||
|
FileInputStream fin = new FileInputStream(path + File.separator + "StringFormulas.xls");
|
||||||
|
|
||||||
|
ExcelExtractor extractor = new ExcelExtractor(new POIFSFileSystem(fin));
|
||||||
|
|
||||||
|
// Comes out as NaN if treated as a number
|
||||||
|
// And as XYZ if treated as a string
|
||||||
|
assertEquals("Sheet1\nXYZ\nSheet2\nSheet3\n", extractor.getText());
|
||||||
|
|
||||||
|
extractor.setFormulasNotResults(true);
|
||||||
|
|
||||||
|
assertEquals("Sheet1\nUPPER(\"xyz\")\nSheet2\nSheet3\n", extractor.getText());
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user