new formula eval docs

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353686 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Avik Sengupta 2005-05-19 10:36:06 +00:00
parent 4290d1b472
commit 4a5d185c23
2 changed files with 102 additions and 105 deletions

View File

@ -55,8 +55,7 @@
ValueEval objects which are set into the AreaEval and RefEval (ok, ValueEval objects which are set into the AreaEval and RefEval (ok,
since AreaEval and RefEval are interfaces, the implementations of since AreaEval and RefEval are interfaces, the implementations of
AreaEval and RefEval - but you'll figure all that out from the code)</p> AreaEval and RefEval - but you'll figure all that out from the code)</p>
<p>OperationEvals for the standard operators have been implemented and <p>OperationEvals for the standard operators have been implemented and tested.</p>
basic testing has been done </p>
</section> </section>
<section><title> FunctionEval and FuncVarEval</title> <section><title> FunctionEval and FuncVarEval</title>
<p>FunctionEval is an abstract super class of FuncVarEval. The reason for this is that in the FormulaParser Ptg classes, there are two Ptgs, FuncPtg and FuncVarPtg. In my tests, I did not see FuncPtg being used so there is no corresponding FuncEval right now. But in case the need arises for a FuncVal class, FuncEval and FuncVarEval need to be isolated with a common interface/abstract class, hence FunctionEval.</p> <p>FunctionEval is an abstract super class of FuncVarEval. The reason for this is that in the FormulaParser Ptg classes, there are two Ptgs, FuncPtg and FuncVarPtg. In my tests, I did not see FuncPtg being used so there is no corresponding FuncEval right now. But in case the need arises for a FuncVal class, FuncEval and FuncVarEval need to be isolated with a common interface/abstract class, hence FunctionEval.</p>
@ -65,120 +64,124 @@
</section> </section>
<section><title>Walkthrough of an "evaluate()" implementation.</title> <section><title>Walkthrough of an "evaluate()" implementation.</title>
<p>So here is the fun part - lets walk through the implementation of the excel <p>So here is the fun part - lets walk through the implementation of the excel
function... <strong>AVERAGE()</strong> </p> function... <strong>SQRT()</strong> </p>
<section><title>The Code</title> <section><title>The Code</title>
<source> <source>
public Eval evaluate(Eval[] operands) { public class Sqrt extends NumericFunction {
private static final ValueEvalToNumericXlator NUM_XLATOR =
new ValueEvalToNumericXlator((short)
( ValueEvalToNumericXlator.BOOL_IS_PARSED
| ValueEvalToNumericXlator.EVALUATED_REF_BOOL_IS_PARSED
| ValueEvalToNumericXlator.EVALUATED_REF_STRING_IS_PARSED
| ValueEvalToNumericXlator.REF_BOOL_IS_PARSED
| ValueEvalToNumericXlator.STRING_IS_PARSED
));
protected ValueEvalToNumericXlator getXlator() {
return NUM_XLATOR;
}
public Eval evaluate(Eval[] operands, int srcRow, short srcCol) {
double d = 0; double d = 0;
int count = 0;
ValueEval retval = null; ValueEval retval = null;
for (int i = 0, iSize = operands.length; i &lt; iSize; i++) {
if (operands[i] == null) continue; switch (operands.length) {
if (operands[i] instanceof AreaEval) { default:
AreaEval ap = (AreaEval) operands[i]; retval = ErrorEval.VALUE_INVALID;
Object[] values = ap.getValues(); break;
for (int j = 0, jSize = values.length; j &lt; jSize; j++) { case 1:
if (values[j] == null) continue; ValueEval ve = singleOperandEvaluate(operands[0], srcRow, srcCol);
if (values[j] instanceof NumberEval) { if (ve instanceof NumericValueEval) {
//inside areas, ignore bools NumericValueEval ne = (NumericValueEval) ve;
d += ((NumberEval) values[j]).getNumberValue(); d = ne.getNumberValue();
count++;
} }
else if (values[j] instanceof RefEval) { else if (ve instanceof BlankEval) {
RefEval re = (RefEval) values[j]; // do nothing
ValueEval ve = re.getInnerValueEval();
if (ve != null &amp;&amp; ve instanceof NumberEval) {
d += ((NumberEval) ve).getNumberValue();
count++;
}
}
}
}
else if (operands[i] instanceof NumericValueEval) {
// for direct operands evaluate bools
NumericValueEval np = (NumericValueEval) operands[i];
d += np.getNumberValue();
count++;
}
else if (operands[i] instanceof RefEval) {
RefEval re = (RefEval) operands[i];
ValueEval ve = re.getInnerValueEval();
if (ve instanceof NumberEval) {
//if it is a reference, ignore bools
NumberEval ne = (NumberEval) ve;
d += ne.getNumberValue();
count++;
} }
else {
retval = ErrorEval.NUM_ERROR;
} }
} }
if (retval == null) { if (retval == null) {
retval = (Double.isNaN(d)) ? d = Math.sqrt(d);
(ValueEval) ErrorEval.ERROR_503 : new NumberEval(d/count); retval = (Double.isNaN(d)) ? (ValueEval) ErrorEval.VALUE_INVALID : new NumberEval(d);
} }
return retval; return retval;
}
} }
</source> </source>
</section> </section>
<section><title>Implementation Details</title> <section><title>Implementation Details</title>
<ul> <ul>
<li>The implementation of the AVERAGE function lives in package <li>The first thing to realise is that classes already exist, even for functions that are not yet implemented.
o.a.p.hssf.record.formula.functions named Average.java. Just that they extend from DefaultFunctionImpl whose behaviour is to return an ErrorEval.FUNCTION_NOT_IMPLEMENTED value.</li>
(Every excel function has a corresponding java source file <li>In order to implement SQRT(..), we need to: a. Extend from the correct Abstract super class; b. implement the evaluate(..) method</li>
in the above package) </li> <li>Hence we extend SQRT(..) from the predefined class NumericFunction</li>
<li>If you open the file for a function thats not yet implemented, you will see one un-implemented method: <li>Since SQRT(..) takes a single argument, we verify the length of the operands array else set the return value to ErrorEval.VALUE_INVALID</li>
<code>public Eval evaluate(Eval[] operands) {}</code> </li> <li>Next we normalize each operand to a limited set of ValueEval subtypes, specifically, we call the function
<li>Since the excel Average() function can take 1 or more operands, we iterate over all operands that are passed in the evaluate method: <code>singleOperandEvaluate(..)</code> to do conversions of different value eval types to one of: NumericValueEval,
<code>for (int i=0, iSize=operands.length; i&lt;iSize; i++) {...}</code></li> BlankEval and ErrorEval. The conversion logic is configured by a ValueEvalToNumericXlator instance which
<li>inside the loop, you will do the following is returned by the Factory method: <code>getXlator(..)</code> The flags used to create the ValueEvalToNumericXlator
<ol> instance are briefly explained as follows:
<li>Do a null check: <code>if (operands[i] == null) continue;</code></li> BOOL_IS_PARSED means whether this function treats Boolean values as 1,
<li>Figure out the actual subtype of ValueEval that the operands REF_BOOL_IS_PARSED means whether Boolean values in cell references are parsed or not.
implements. The possible types that you will encounter in an So also, EVALUATED_REF_BOOL_IS_PARSED means if the operand was a RefEval that was assigned a
evaluate() are: NumberEval, BoolEval, StringEval, ErrorEval, Boolean value as a result of evaluation of the formula that it contained.
AreaEval, RefEval, BlankEval.</li> eg. SQRT(TRUE) returns 1: This means BOOL_IS_PARSED should be set.
<li>Implement the function. See the next section for some SQRT(A1) returns 1 when A1 has TRUE: This means REF_BOOL_IS_PARSED should be set.
caveats on implementing the Excel semantics. </li> SQRT(A1) returns 1 when A1 has a formula that evaluates to TRUE: This means EVALUATED_REF_BOOL_IS_PARSED should be set.
</ol> If the flag is not set for a particular case, that case is ignored (treated as if the cell is blank) _unless_
there is a flag like: STRING_IS_INVALID_VALUE (which means that Strings should be treated as resulting in VALUE_INVALID ErrorEval)
</li> </li>
<li>Next perform the appropriate Math function on the double value (if an error didnt occur already).</li>
<li>Finally before returning the NumberEval wrapping the double value that <li>Finally before returning the NumberEval wrapping the double value that
you computed, do one final check to see if the double is a NaN, you computed, do one final check to see if the double is a NaN, (or if it is "Infinite")
if it is return ErrorEval.ERROR_503 (see the javadoc in ErrorEval.java If it is return the appropriate ErrorEval instance. Note: The OpenOffice.org error codes
for description of error codes - it is html so you might as well should NOT be preferred. Instead use the excel specific error codes like VALUE_INVALID, NUM_ERROR, DIV_ZERO etc.
generate the javadocs)</li> (Thanks to Avik for bringing this issue up early!) The Oo.o ErrorCodes will be removed (if they havent already been :)</li>
</ul> </ul>
</section> </section>
<section><title>Modelling Excel Semantics</title> <section><title>Modelling Excel Semantics</title>
<p>Strings are ignored. Booleans are ignored!!! (damn Oo.o! I was almost misled here - nevermind). Actually here's the info on Bools: <p>Strings are ignored. Booleans are ignored!!!. Actually here's the info on Bools:
if you have formula: "=TRUE+1", it evaluates to 2. if you have formula: "=TRUE+1", it evaluates to 2.
So also, when you use TRUE like this: "=SUM(1,TRUE)", you see the result is: 2. So also, when you use TRUE like this: "=SUM(1,TRUE)", you see the result is: 2.
So TRUE means 1 when doing numeric calculations, right? So TRUE means 1 when doing numeric calculations, right?
Wrong! Wrong!
Because when you use TRUE in referenced cells with arithmetic functions, it evaluates to blank - meaning it is not evaluated - as if it was string or a blank cell. Because when you use TRUE in referenced cells with arithmetic functions, it evaluates to blank - meaning it is not evaluated - as if it was string or a blank cell.
eg. "=SUM(1,A1)" when A1 is TRUE evaluates to 1. eg. "=SUM(1,A1)" when A1 is TRUE evaluates to 1.
So you have to do this kind of check for every possible data type as a function argument for any function before you understand the behaviour of the function. The operands can be entered in excel as comma separated or as a region specified like: A2:D4. Regions are treated as a single token by the parser hence we have AreaEval which stores the ValueEval at each cell in a region in a 1D array. So in our function if the operand is of type AreaEval we need to get the array of ValueEvals in the region of the AreaEval and iterate over each of them as if each of them were individual operands to the AVERAGE function. This behaviour changes depending on which function you are using. eg. SQRT(..) that was
described earlier treats a TRUE as 1 in all cases. This is why the configurable ValueEvalToNumericXlator
class had to be written.
</p> </p>
<p>Thus, since sometimes, Excel treats <p>Note that when you are extending from an abstract function class like
Booleans as the numbers 0 and 1 (for F and T respectively). NumericFunction (rather than implementing the interface o.a.p.hssf.record.formula.eval.Function directly)
Hence BoolEval and NumberEval both implement a common interface: you can use the utility methods in the super class - singleOperandEvaluate(..) - to quickly
NumericValueEval (since numbers and bools are also valid string reduce the different ValueEval subtypes to a small set of possible types. However when
values, they also implement StringValueEval interface which is implemenitng the Function interface directly, you will have to handle the possiblity
also implemented by StringEval).</p> of all different ValueEval subtypes being sent in as 'operands'. (Hard to put this in
<p> word, please have a look at the code for NumericFunction for an example of
The ValueEval inside an AreaEval can be one of: how/why different ValueEvals need to be handled)
NumberEval, BoolEval, StringEval, ErrorEval, BlankEval.
So you must handle each of these cases.
Similarly, RefEvals have a property: innerValueEval that returns the ValueEval at the referenced cell. The ValueEval inside a RefEval can be one of: NumberEval, BoolEval, StringEval, ErrorEval, BlankEval. So you must handle each of these cases - see how excel treats each one of them.
</p> </p>
</section> </section>
</section> </section>
<section><title>Testing Framework</title> <section><title>Testing Framework</title>
<fixme author="AD">TODO! FormulaEval comes with a testing framework, where you add <p>Automated testing of the implemented Function is easy.
formula's and their expected values to an Excel sheet, and the test code The source code for this is in the file: o.a.p.h.record.formula.GenericFormulaTestCase.java
automatically validates them. Since this is still in flux, the docs This class has a reference to the test xls file (not /a/ test xls, /the/ test xls :)
will be put online once the system is stable </fixme> which may need to be changed for your environment. Once you do that, in the test xls,
locate the entry for the function that you have implemented and enter different tests
in a cell in the FORMULA row. Then copy the "value of" the formula that you entered in the
cell just below it (this is easily done in excel as:
[copy the formula cell] > [go to cell below] > Edit > Paste Special > Values > "ok").
You can enter multiple such formulas and paste their values in the cell below and the
test framework will automatically test if the formula evaluation matches the expected
value (Again, hard to put in words, so if you will, please take time to quickly look
at the code and the currently entered tests in the patch attachment "FormulaEvalTestData.xls"
file).
</p>
</section> </section>
</body> </body>
</document> </document>

View File

@ -23,7 +23,7 @@
</section> </section>
<section><title>Status</title> <section><title>Status</title>
<p> The code currently provides implementations for all the arithmatic operators. <p> The code currently provides implementations for all the arithmatic operators.
It also provides implementations for about 30 built in It also provides implementations for approx. 20 built in
functions in Excel. The framework however makes is easy to add functions in Excel. The framework however makes is easy to add
implementation of new functions. See the <link href="eval-devguide.html"> Formula implementation of new functions. See the <link href="eval-devguide.html"> Formula
evaluation development guide</link> for details. </p> evaluation development guide</link> for details. </p>
@ -47,19 +47,17 @@ HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(sheet, wb);
CellReference cellReference = new CellReference("B3"); CellReference cellReference = new CellReference("B3");
HSSFRow row = sheet.getRow(cellReference.getRow()); HSSFRow row = sheet.getRow(cellReference.getRow());
HSSFCell cell = row.getCell(cellReference.getCol()); HSSFCell cell = row.getCell(cellReference.getCol());
String formulaString = c.getCellFormula(); HSSFFormulaEvaluator.CellValue cellValue = evaluator.evaluate(cell);
HSSFFormulaEvaluator.CellValue cellValue =
evaluator.evaluate(formulaString);
switch (cellValue.getCellType()) { switch (cellValue.getCellType()) {
case HSSFCell.CELL_TYPE_BOOLEAN: case HSSFCell.CELL_TYPE_BOOLEAN:
System.out.println(cellValue.getBooleanCellValue()); System.out.println(cellValue.getBooleanValue());
break; break;
case HSSFCell.CELL_TYPE_NUMERIC: case HSSFCell.CELL_TYPE_NUMERIC:
System.out.println(cellValue.getNumberCellValue()); System.out.println(cellValue.getNumberValue());
break; break;
case HSSFCell.CELL_TYPE_STRING: case HSSFCell.CELL_TYPE_STRING:
System.out.println(cellValue.getStringCellValue()); System.out.println(cellValue.getStringValue());
break; break;
case HSSFCell.CELL_TYPE_BLANK: case HSSFCell.CELL_TYPE_BLANK:
break; break;
@ -83,7 +81,7 @@ switch (cellValue.getCellType()) {
<section><title>Using HSSFFormulaEvaluator.<strong>evaluateInCell</strong>(HSSFCell cell) <section><title>Using HSSFFormulaEvaluator.<strong>evaluateInCell</strong>(HSSFCell cell)
</title> </title>
<source> <source>
FileInputStream fis = new FileInputStream("c:/temp/test.xls"); FileInputStream fis = new FileInputStream("/somepath/test.xls");
HSSFWorkbook wb = new HSSFWorkbook(fis); HSSFWorkbook wb = new HSSFWorkbook(fis);
HSSFSheet sheet = wb.getSheetAt(0); HSSFSheet sheet = wb.getSheetAt(0);
HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(sheet, wb); HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(sheet, wb);
@ -92,7 +90,7 @@ HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(sheet, wb);
CellReference cellReference = new CellReference("B3"); CellReference cellReference = new CellReference("B3");
HSSFRow row = sheet.getRow(cellReference.getRow()); HSSFRow row = sheet.getRow(cellReference.getRow());
HSSFCell cell = row.getCell(cellReference.getCol()); HSSFCell cell = row.getCell(cellReference.getCol());
String formulaString = c.getCellFormula();
if (cell!=null) { if (cell!=null) {
switch (<strong>evaluator.evaluateInCell</strong>(cell).getCellType()) { switch (<strong>evaluator.evaluateInCell</strong>(cell).getCellType()) {
@ -121,10 +119,6 @@ if (cell!=null) {
</section> </section>
</section> </section>
<section><title></title>
</section>
<section><title>Performance Notes</title> <section><title>Performance Notes</title>
<ul> <ul>
<li>Generally you should have to create only one HSSFFormulaEvaluator <li>Generally you should have to create only one HSSFFormulaEvaluator