From d683ddaa806ff78ccde77be1236c762b44172fad Mon Sep 17 00:00:00 2001
From: Josh Micich <josh@apache.org>
Date: Mon, 29 Sep 2008 23:12:53 +0000
Subject: [PATCH] Updated formula evaluator documentation due to bugzilla 45768

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@700304 13f79535-47bb-0310-9956-ffa450edef68
---
 .../content/xdocs/hssf/eval-devguide.xml      | 109 +++++----------
 src/documentation/content/xdocs/hssf/eval.xml | 129 +++++++++---------
 .../hssf/usermodel/HSSFFormulaEvaluator.java  |  21 ++-
 3 files changed, 110 insertions(+), 149 deletions(-)
diff --git a/src/documentation/content/xdocs/hssf/eval-devguide.xml b/src/documentation/content/xdocs/hssf/eval-devguide.xml
index 29c33b8a2..558620bfa 100644
--- a/src/documentation/content/xdocs/hssf/eval-devguide.xml
+++ b/src/documentation/content/xdocs/hssf/eval-devguide.xml
@@ -81,84 +81,45 @@
 	</section>
 	<section><title>Walkthrough of an "evaluate()" implementation.</title>
 		<p>So here is the fun part - lets walk through the implementation of the excel 
-			function... <strong>SQRT()</strong> </p>
+			function... <strong>NOT()</strong> </p>
 		<section><title>The Code</title>
 		<source>
-public class Sqrt extends NumericFunction {
-    
-    private static final ValueEvalToNumericXlator NUM_XLATOR = 
-        new ValueEvalToNumericXlator((short)
-                ( ValueEvalToNumericXlator.BOOL_IS_PARSED 
-                | ValueEvalToNumericXlator.EVALUATED_REF_BOOL_IS_PARSED
-                | ValueEvalToNumericXlator.EVALUATED_REF_STRING_IS_PARSED
-                | ValueEvalToNumericXlator.REF_BOOL_IS_PARSED
-                | ValueEvalToNumericXlator.STRING_IS_PARSED
-                ));
-
-    protected ValueEvalToNumericXlator getXlator() {
-        return NUM_XLATOR;
-    }
-
-    public Eval evaluate(Eval[] operands, int srcRow, short srcCol) {
-        double d = 0;
-        ValueEval retval = null;
-        
-        switch (operands.length) {
-        default:
-            retval = ErrorEval.VALUE_INVALID;
-            break;
-        case 1:
-            ValueEval ve = singleOperandEvaluate(operands[0], srcRow, srcCol);
-            if (ve instanceof NumericValueEval) {
-                NumericValueEval ne = (NumericValueEval) ve;
-                d = ne.getNumberValue();
-            }
-            else if (ve instanceof BlankEval) {
-                // do nothing
-            }
-            else {
-                retval = ErrorEval.NUM_ERROR;
-            }
-        }
-        
-        if (retval == null) {
-            d = Math.sqrt(d);
-            retval = (Double.isNaN(d)) ? (ValueEval) ErrorEval.VALUE_INVALID : new NumberEval(d);
-        }
-        return retval;
-    }
+public final class Not implements Function {
 
+	public Eval evaluate(Eval[] args, int srcCellRow, short srcCellCol) {
+		if (args.length != 1) {
+			return ErrorEval.VALUE_INVALID;
+		}
+		boolean boolArgVal;
+		try {
+			ValueEval ve = OperandResolver.getSingleValue(args[0], srcCellRow, srcCellCol);
+			Boolean b = OperandResolver.coerceValueToBoolean(ve, false);
+			boolArgVal = b == null ? false : b.booleanValue();
+		} catch (EvaluationException e) {
+			return e.getErrorEval();
+		}
+		
+		return BoolEval.valueOf(!boolArgVal);
+	}
 }
+
 	</source>
 		</section>
 		<section><title>Implementation Details</title>
 		<ul>
 			<li>The first thing to realise is that classes already exist, even for functions that are not yet implemented.
-			Just that they extend from DefaultFunctionImpl whose behaviour is to return an ErrorEval.FUNCTION_NOT_IMPLEMENTED value.</li>
-			<li>In order to implement SQRT(..), we need to: a. Extend from the correct Abstract super class; b. implement the evaluate(..) method</li>
-			<li>Hence we extend SQRT(..) from the predefined class NumericFunction</li>
-			<li>Since SQRT(..) takes a single argument, we verify the length of the operands array else set the return value to ErrorEval.VALUE_INVALID</li>
+			Just that they extend from NotImplementedFunction whose behaviour is to return an ErrorEval.FUNCTION_NOT_IMPLEMENTED value.</li>
+			<li>In order to implement NOT(..), we need to implement the interface 'Function' which has a method 'evaluate(..)'</li>
+			<li>Since NOT(..) takes a single argument, we verify the length of the operands array else set the return value to ErrorEval.VALUE_INVALID</li>
 			<li>Next we normalize each operand to a limited set of ValueEval subtypes, specifically, we call the function 
-			<code>singleOperandEvaluate(..)</code> to do conversions of different value eval types to one of: NumericValueEval,
-			BlankEval and ErrorEval. The conversion logic is configured by a ValueEvalToNumericXlator instance which
-			is returned by the Factory method: <code>getXlator(..)</code> The flags used to create the ValueEvalToNumericXlator
-			instance are briefly explained as follows:
-			BOOL_IS_PARSED means whether this function treats Boolean values as 1, 
-			REF_BOOL_IS_PARSED means whether Boolean values in cell references are parsed or not.
-			So also, EVALUATED_REF_BOOL_IS_PARSED means if the operand was a RefEval that was assigned a
-			Boolean value as a result of evaluation of the formula that it contained.
-			eg. SQRT(TRUE) returns 1: This means BOOL_IS_PARSED should be set.
-			SQRT(A1) returns 1 when A1 has TRUE: This means REF_BOOL_IS_PARSED should be set.
-			SQRT(A1) returns 1 when A1 has a formula that evaluates to TRUE: This means EVALUATED_REF_BOOL_IS_PARSED should be set.
-			If the flag is not set for a particular case, that case is ignored (treated as if the cell is blank) _unless_
-			there is a flag like: STRING_IS_INVALID_VALUE (which means that Strings should be treated as resulting in VALUE_INVALID ErrorEval)
+			<code>OperandResolver.getSingleValue</code> to do conversions of different value eval types to one of: NumericValueEval,
+			BlankEval and ErrorEval. The conversion logic is performed by OperandResolver.coerceValueToBoolean()
 			</li>
-			<li>Next perform the appropriate Math function on the double value (if an error didnt occur already).</li>
-			<li>Finally before returning the NumberEval wrapping the double value that 
-				you computed, do one final check to see if the double is a NaN, (or if it is "Infinite")
-				If it is return the appropriate ErrorEval instance. Note: The OpenOffice.org error codes
-				should NOT be preferred. Instead use the excel specific error codes like VALUE_INVALID, NUM_ERROR, DIV_ZERO etc. 
-				(Thanks to Avik for bringing this issue up early!) The Oo.o ErrorCodes will be removed (if they havent already been :)</li>
+			<li>Next perform the appropriate java operation (if an error didnt occur already).</li>
+			<li>Finally return the BoolEval wrapping primitive boolean result. Note - in the case of numeric results
+			you should check for NaN and Infinity, because exel likes these translated into specific error codes like
+			VALUE_INVALID, NUM_ERROR, DIV_ZERO etc. 
+				(Thanks to Avik for bringing this issue up early!)</li>
 		</ul>
 		</section>
 		<section><title>Modelling Excel Semantics</title>
@@ -170,8 +131,8 @@ public class Sqrt extends NumericFunction {
 				Because when you use TRUE in referenced cells with arithmetic functions, it evaluates to blank - meaning it is not evaluated - as if it was string or a blank cell. 
 				eg. "=SUM(1,A1)" when A1 is TRUE evaluates to 1.
 				This behaviour changes depending on which function you are using. eg. SQRT(..) that was 
-				described earlier treats a TRUE as 1 in all cases. This is why the configurable ValueEvalToNumericXlator
-				class had to be written.
+				described earlier treats a TRUE as 1 in all cases. The various conversion logic has been refactored into common places like the following classes:
+				OperandResolver, TextFunction, NumericFunction, MultiOperandNumericFunction and FinanceFunction.
 				</p>
 			<p>Note that when you are extending from an abstract function class like
 			NumericFunction (rather than implementing the interface o.a.p.hssf.record.formula.eval.Function directly)
@@ -186,18 +147,16 @@ public class Sqrt extends NumericFunction {
 	</section>
 	<section><title>Testing Framework</title>
 	<p>Automated testing of the implemented Function is easy.
-	The source code for this is in the file: o.a.p.h.record.formula.GenericFormulaTestCase.java
-	This class has a reference to the test xls file (not /a/ test xls, /the/ test xls :)
-	which may need to be changed for your environment. Once you do that, in the test xls,
+	The source code for this is in the file: o.a.p.h.record.formula.TestFormulasFromSpreadsheet.java
+	This class has a reference to the Excel test sample file 'FormulaEvalTestData.xls'. In this file,
 	locate the entry for the function that you have implemented and enter different tests 
 	in a cell in the FORMULA row. Then copy the "value of" the formula that you entered in the
 	cell just below it (this is easily done in excel as: 
 	[copy the formula cell] > [go to cell below] > Edit > Paste Special > Values > "ok").
 	You can enter multiple such formulas and paste their values in the cell below and the
 	test framework will automatically test if the formula evaluation matches the expected
-	value (Again, hard to put in words, so if you will, please take time to quickly look
-	at the code and the currently entered tests in the patch attachment "FormulaEvalTestData.xls" 
-	file).
+	value (Please take time to quickly look at the code and the currently entered tests in the 
+	file "FormulaEvalTestData.xls").
 	</p>	
 	</section>
 	</body>
diff --git a/src/documentation/content/xdocs/hssf/eval.xml b/src/documentation/content/xdocs/hssf/eval.xml
index 32279cc4a..61cb66374 100644
--- a/src/documentation/content/xdocs/hssf/eval.xml
+++ b/src/documentation/content/xdocs/hssf/eval.xml
@@ -20,13 +20,13 @@
 <!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "../dtd/document-v11.dtd">
 
 <document>
-    <header>
-        <title>Formula Evaluation</title>
-        <authors>
+	<header>
+		<title>Formula Evaluation</title>
+		<authors>
 			<person email="amoweb@yahoo.com" name="Amol Deshmukh" id="AD"/>
-        </authors>
-    </header>
-    <body>
+		</authors>
+	</header>
+	<body>
 		<section><title>Introduction</title>
 			<p>The POI formula evaluation code enables you to calculate the result of 
 				formulas in Excels sheets read-in, or created in POI. This document explains
@@ -66,35 +66,34 @@
 FileInputStream fis = new FileInputStream("c:/temp/test.xls");
 HSSFWorkbook wb = new HSSFWorkbook(fis);
 HSSFSheet sheet = wb.getSheetAt(0);
-HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(sheet, wb);
+HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(wb);
 
 // suppose your formula is in B3
-CellReference cellReference = new CellReference("B3"); 
+CellReference cellReference = new CellReference("B3");
 HSSFRow row = sheet.getRow(cellReference.getRow());
-HSSFCell cell = row.getCell(cellReference.getCol()); 
+HSSFCell cell = row.getCell((int)cellReference.getCol());
 
-evaluator.setCurrentRow(row);
 HSSFFormulaEvaluator.CellValue cellValue = evaluator.evaluate(cell);
 
 switch (cellValue.getCellType()) {
 	case HSSFCell.CELL_TYPE_BOOLEAN:
-    	System.out.println(cellValue.getBooleanValue());
-    	break;
+		System.out.println(cellValue.getBooleanValue());
+		break;
 	case HSSFCell.CELL_TYPE_NUMERIC:
-    	System.out.println(cellValue.getNumberValue());
-    	break;
+		System.out.println(cellValue.getNumberValue());
+		break;
 	case HSSFCell.CELL_TYPE_STRING:
-    	System.out.println(cellValue.getStringValue());
-    	break;
+		System.out.println(cellValue.getStringValue());
+		break;
 	case HSSFCell.CELL_TYPE_BLANK:
-    	break;
+		break;
 	case HSSFCell.CELL_TYPE_ERROR:
-    	break;
+		break;
 
 	// CELL_TYPE_FORMULA will never happen
-	case HSSFCell.CELL_TYPE_FORMULA: 
-    	break;
-}				
+	case HSSFCell.CELL_TYPE_FORMULA:
+		break;
+}			
 				</source>
 				<p>Thus using the retrieved value (of type 
 					HSSFFormulaEvaluator.CellValue - a nested class) returned 
@@ -119,36 +118,42 @@ switch (cellValue.getCellType()) {
 FileInputStream fis = new FileInputStream("/somepath/test.xls");
 HSSFWorkbook wb = new HSSFWorkbook(fis);
 HSSFSheet sheet = wb.getSheetAt(0);
-HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(sheet, wb);
+HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(wb);
 
 // suppose your formula is in B3
-CellReference cellReference = new CellReference("B3"); 
+CellReference cellReference = new CellReference("B3");
 HSSFRow row = sheet.getRow(cellReference.getRow());
-HSSFCell cell = row.getCell(cellReference.getCol()); 
-evaluator.setCurrentRow(row);
+HSSFCell cell = row.getCell((int)cellReference.getCol());
 
 if (cell!=null) {
-	switch (<strong>evaluator.evaluateFormulaCell</strong>(cell)) {
+	int valueType = evaluator.evaluateFormulaCell(cell);
+	if (valueType == -1) {
+		// Cell was not a formula cell
+		// but we can read the plain value from the cell just the same as a formula result
+		valueType = cell.getCellType();
+	}
+	switch (valueType) {
 		case HSSFCell.CELL_TYPE_BOOLEAN:
-		    System.out.println(cell.getBooleanCellValue());
-		    break;
+			System.out.println(cell.getBooleanCellValue());
+			break;
 		case HSSFCell.CELL_TYPE_NUMERIC:
-		    System.out.println(cell.getNumberCellValue());
-		    break;
+			System.out.println(cell.getNumericCellValue());
+			break;
 		case HSSFCell.CELL_TYPE_STRING:
-		    System.out.println(cell.getStringCellValue());
-		    break;
+			System.out.println(cell.getRichStringCellValue().getString());
+			break;
 		case HSSFCell.CELL_TYPE_BLANK:
-		    break;
+			break;
 		case HSSFCell.CELL_TYPE_ERROR:
-		    System.out.println(cell.getErrorCellValue());
-		    break;
-		
-		// CELL_TYPE_FORMULA will never occur
-		case HSSFCell.CELL_TYPE_FORMULA: 
-		    break;
+			System.out.println(HSSFErrorConstants.getText(cell.getErrorCellValue()));
+			break;
+
+		case HSSFCell.CELL_TYPE_FORMULA:
+			throw new IllegalStateException("Result-type 'formula' cannot occur");
 	}
 }
+
+
 				</source>
 			</section>
 
@@ -163,36 +168,36 @@ if (cell!=null) {
 FileInputStream fis = new FileInputStream("/somepath/test.xls");
 HSSFWorkbook wb = new HSSFWorkbook(fis);
 HSSFSheet sheet = wb.getSheetAt(0);
-HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(sheet, wb);
+HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(wb);
 
 // suppose your formula is in B3
-CellReference cellReference = new CellReference("B3"); 
+CellReference cellReference = new CellReference("B3");
 HSSFRow row = sheet.getRow(cellReference.getRow());
-HSSFCell cell = row.getCell(cellReference.getCol()); 
-evaluator.setCurrentRow(row);
+HSSFCell cell = row.getCell((int)cellReference.getCol());
 
 if (cell!=null) {
-	switch (<strong>evaluator.evaluateInCell</strong>(cell).getCellType()) {
+	switch (evaluator.evaluateInCell(cell).getCellType()) {
 		case HSSFCell.CELL_TYPE_BOOLEAN:
-		    System.out.println(cell.getBooleanCellValue());
-		    break;
+			System.out.println(cell.getBooleanCellValue());
+			break;
 		case HSSFCell.CELL_TYPE_NUMERIC:
-		    System.out.println(cell.getNumberCellValue());
-		    break;
+			System.out.println(cell.getNumericCellValue());
+			break;
 		case HSSFCell.CELL_TYPE_STRING:
-		    System.out.println(cell.getStringCellValue());
-		    break;
+			System.out.println(cell.getRichStringCellValue().getString());
+			break;
 		case HSSFCell.CELL_TYPE_BLANK:
-		    break;
+			break;
 		case HSSFCell.CELL_TYPE_ERROR:
-		    System.out.println(cell.getErrorCellValue());
-		    break;
-		
+			System.out.println(cell.getErrorCellValue());
+			break;
+
 		// CELL_TYPE_FORMULA will never occur
-		case HSSFCell.CELL_TYPE_FORMULA: 
-		    break;
+		case HSSFCell.CELL_TYPE_FORMULA:
+			break;
 	}
 }
+
 				</source>
 			</section>
 
@@ -201,13 +206,12 @@ if (cell!=null) {
 				<source>
 FileInputStream fis = new FileInputStream("/somepath/test.xls");
 HSSFWorkbook wb = new HSSFWorkbook(fis);
-for(int sheetNum = 0; sheetNum &lt; wb.getNumberOfSheets(); sheetNum++) {
+HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(wb);
+for(int sheetNum = 0; sheetNum < wb.getNumberOfSheets(); sheetNum++) {
 	HSSFSheet sheet = wb.getSheetAt(sheetNum);
-	HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(sheet, wb);
 
 	for(Iterator rit = sheet.rowIterator(); rit.hasNext();) {
 		HSSFRow r = (HSSFRow)rit.next();
-		evaluator.setCurrentRow(r);
 
 		for(Iterator cit = r.cellIterator(); cit.hasNext();) {
 			HSSFCell c = (HSSFCell)cit.next();
@@ -218,6 +222,7 @@ for(int sheetNum = 0; sheetNum &lt; wb.getNumberOfSheets(); sheetNum++) {
 	}
 }
 wb.write(new FileOutputStream("/somepath/changed.xls"));
+
 				</source>
 			</section>
 		</section>
@@ -226,12 +231,12 @@ wb.write(new FileOutputStream("/somepath/changed.xls"));
 		<section><title>Performance Notes</title>
 			<ul>
 				<li>Generally you should have to create only one HSSFFormulaEvaluator 
-					instance per sheet, but there really is no overhead in creating 
-					multiple HSSFFormulaEvaluators per sheet other than that of the 
+					instance per workbook, but there really is no overhead in creating 
+					multiple HSSFFormulaEvaluators per workbook other than that of the 
 					HSSFFormulaEvaluator object creation. 
 				</li>
 				<li>Also note that HSSFFormulaEvaluator maintains a reference to 
-					the sheet and workbook, so ensure that the evaluator instance 
+					the workbook, so ensure that the evaluator instance 
 					is available for garbage collection when you are done with it 
 					(in other words don't maintain long lived reference to 
 					HSSFFormulaEvaluator if you don't really need to - unless 
diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFFormulaEvaluator.java b/src/java/org/apache/poi/hssf/usermodel/HSSFFormulaEvaluator.java
index a44999f0e..a836e6594 100644
--- a/src/java/org/apache/poi/hssf/usermodel/HSSFFormulaEvaluator.java
+++ b/src/java/org/apache/poi/hssf/usermodel/HSSFFormulaEvaluator.java
@@ -144,22 +144,19 @@ public class HSSFFormulaEvaluator {
 
 
 	/**
-	 * If cell contains formula, it evaluates the formula,
-	 *  and saves the result of the formula. The cell
-	 *  remains as a formula cell.
-	 * Else if cell does not contain formula, this method leaves
-	 *  the cell unchanged.
-	 * Note that the type of the formula result is returned,
-	 *  so you know what kind of value is also stored with
-	 *  the formula.
+	 * If cell contains formula, it evaluates the formula, and saves the result of the formula. The
+	 * cell remains as a formula cell. If the cell does not contain formula, this method returns -1
+	 * and leaves the cell unchanged.
+	 * 
+	 * Note that the type of the <em>formula result</em> is returned, so you know what kind of 
+	 * cached formula result is also stored with  the formula.
 	 * <pre>
 	 * int evaluatedCellType = evaluator.evaluateFormulaCell(cell);
 	 * </pre>
-	 * Be aware that your cell will hold both the formula,
-	 *  and the result. If you want the cell replaced with
-	 *  the result of the formula, use {@link #evaluateInCell(HSSFCell)}
+	 * Be aware that your cell will hold both the formula, and the result. If you want the cell 
+	 * replaced with the result of the formula, use {@link #evaluateInCell(HSSFCell)}
 	 * @param cell The cell to evaluate
-	 * @return The type of the formula result (the cell's type remains as HSSFCell.CELL_TYPE_FORMULA however)
+	 * @return -1 for non-formula cells, or the type of the <em>formula result</em>
 	 */
 	public int evaluateFormulaCell(HSSFCell cell) {
 		if (cell == null || cell.getCellType() != HSSFCell.CELL_TYPE_FORMULA) {