Formula values for Excel 4 extractor, for TIKA-1490

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1642497 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2014-11-30 01:30:43 +00:00
parent 17b050c66f
commit 41ba513d11
3 changed files with 40 additions and 22 deletions

View File

@ -22,7 +22,6 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.OldFormulaRecord;
import org.apache.poi.hssf.record.OldLabelRecord;
@ -42,7 +41,6 @@ import org.apache.poi.ss.usermodel.Cell;
*/
public class OldExcelExtractor {
private InputStream input;
private boolean _includeSheetNames = true;
public OldExcelExtractor(InputStream input) {
this.input = input;
@ -61,13 +59,6 @@ public class OldExcelExtractor {
System.out.println(extractor.getText());
}
/**
* Should sheet names be included? Default is true
*/
public void setIncludeSheetNames(boolean includeSheetNames) {
_includeSheetNames = includeSheetNames;
}
/**
* Retrieves the text contents of the file, as best we can
* for these old file formats
@ -95,32 +86,35 @@ public class OldExcelExtractor {
text.append(sr.getString());
text.append('\n');
break;
// number - 5.71 - TODO Needs format strings
case NumberRecord.sid:
NumberRecord nr = new NumberRecord(ris);
text.append(nr.getValue());
text.append('\n');
handleNumericCell(text, nr.getValue());
break;
case OldFormulaRecord.biff2_sid:
case OldFormulaRecord.biff3_sid:
case OldFormulaRecord.biff4_sid:
OldFormulaRecord fr = new OldFormulaRecord(ris);
// if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) {
text.append(fr.getValue());
text.append('\n');
// }
if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) {
handleNumericCell(text, fr.getValue());
}
break;
case RKRecord.sid:
RKRecord rr = new RKRecord(ris);
text.append(rr.getRKNumber());
text.append('\n');
handleNumericCell(text, rr.getRKNumber());
break;
default:
ris.readFully(new byte[ris.remaining()]);
// text.append(" = " + ris.getSid() + " = \n");
}
}
return text.toString();
}
protected void handleNumericCell(StringBuffer text, double value) {
// TODO Need to fetch / use format strings
text.append(value);
text.append('\n');
}
}

View File

@ -47,7 +47,7 @@ public final class FormulaRecord extends CellRecord {
* Excel encodes the same 8 bytes that would be field_4_value with various NaN
* values that are decoded/encoded by this class.
*/
private static final class SpecialCachedValue {
static final class SpecialCachedValue {
/** deliberately chosen by Excel in order to encode other values within Double NaNs */
private static final long BIT_MARKER = 0xFFFF000000000000L;
private static final int VARIABLE_DATA_LENGTH = 6;

View File

@ -17,6 +17,8 @@
package org.apache.poi.hssf.record;
import org.apache.poi.hssf.record.FormulaRecord.SpecialCachedValue;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.ss.formula.Formula;
import org.apache.poi.ss.formula.ptg.Ptg;
@ -30,6 +32,7 @@ public final class OldFormulaRecord extends OldCellRecord {
public final static short biff4_sid = 0x0406;
public final static short biff5_sid = 0x0006;
private SpecialCachedValue specialCachedValue;
private double field_4_value;
private short field_5_options;
private Formula field_6_parsed_expr;
@ -37,8 +40,15 @@ public final class OldFormulaRecord extends OldCellRecord {
public OldFormulaRecord(RecordInputStream ris) {
super(ris, ris.getSid() == biff2_sid);
// TODO Handle special cached values, for Biff 3+
if (isBiff2()) {
field_4_value = ris.readDouble();
} else {
long valueLongBits = ris.readLong();
specialCachedValue = SpecialCachedValue.create(valueLongBits);
if (specialCachedValue == null) {
field_4_value = Double.longBitsToDouble(valueLongBits);
}
}
if (isBiff2()) {
field_5_options = (short)ris.readUByte();
@ -51,6 +61,20 @@ public final class OldFormulaRecord extends OldCellRecord {
field_6_parsed_expr = Formula.read(expression_len, ris, nBytesAvailable);
}
public int getCachedResultType() {
if (specialCachedValue == null) {
return HSSFCell.CELL_TYPE_NUMERIC;
}
return specialCachedValue.getValueType();
}
public boolean getCachedBooleanValue() {
return specialCachedValue.getBooleanValue();
}
public int getCachedErrorValue() {
return specialCachedValue.getErrorValue();
}
/**
* get the calculated value of the formula
*