Bugzilla 48195 - short-circuit evaluation of IF() and CHOOSE()
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@835994 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a76bd88d9b
commit
861e2bae01
@ -34,6 +34,7 @@
|
|||||||
|
|
||||||
<changes>
|
<changes>
|
||||||
<release version="3.6-beta1" date="2009-??-??">
|
<release version="3.6-beta1" date="2009-??-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="add">48195 - short-circuit evaluation of IF() and CHOOSE()</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">48161 - support for text extraction from PPT master slides</action>
|
<action dev="POI-DEVELOPERS" type="add">48161 - support for text extraction from PPT master slides</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">47970 - added a method to set arabic mode in HSSFSheet</action>
|
<action dev="POI-DEVELOPERS" type="add">47970 - added a method to set arabic mode in HSSFSheet</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">48134 - release system resources when using Picture.resize()</action>
|
<action dev="POI-DEVELOPERS" type="fix">48134 - release system resources when using Picture.resize()</action>
|
||||||
|
@ -23,7 +23,6 @@ import org.apache.poi.hssf.record.formula.eval.OperandResolver;
|
|||||||
import org.apache.poi.hssf.record.formula.eval.ValueEval;
|
import org.apache.poi.hssf.record.formula.eval.ValueEval;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
|
||||||
* @author Josh Micich
|
* @author Josh Micich
|
||||||
*/
|
*/
|
||||||
public final class Choose implements Function {
|
public final class Choose implements Function {
|
||||||
@ -34,8 +33,7 @@ public final class Choose implements Function {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
ValueEval ev = OperandResolver.getSingleValue(args[0], srcRowIndex, srcColumnIndex);
|
int ix = evaluateFirstArg(args[0], srcRowIndex, srcColumnIndex);
|
||||||
int ix = OperandResolver.coerceValueToInt(ev);
|
|
||||||
if (ix < 1 || ix >= args.length) {
|
if (ix < 1 || ix >= args.length) {
|
||||||
return ErrorEval.VALUE_INVALID;
|
return ErrorEval.VALUE_INVALID;
|
||||||
}
|
}
|
||||||
@ -44,4 +42,10 @@ public final class Choose implements Function {
|
|||||||
return e.getErrorEval();
|
return e.getErrorEval();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static int evaluateFirstArg(ValueEval arg0, int srcRowIndex, int srcColumnIndex)
|
||||||
|
throws EvaluationException {
|
||||||
|
ValueEval ev = OperandResolver.getSingleValue(arg0, srcRowIndex, srcColumnIndex);
|
||||||
|
return OperandResolver.coerceValueToInt(ev);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -53,7 +53,7 @@ public final class If implements Function {
|
|||||||
return falseResult;
|
return falseResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean evaluateFirstArg(ValueEval arg, int srcCellRow, short srcCellCol)
|
public static boolean evaluateFirstArg(ValueEval arg, int srcCellRow, int srcCellCol)
|
||||||
throws EvaluationException {
|
throws EvaluationException {
|
||||||
ValueEval ve = OperandResolver.getSingleValue(arg, srcCellRow, srcCellCol);
|
ValueEval ve = OperandResolver.getSingleValue(arg, srcCellRow, srcCellCol);
|
||||||
Boolean b = OperandResolver.coerceValueToBoolean(ve, false);
|
Boolean b = OperandResolver.coerceValueToBoolean(ve, false);
|
||||||
|
@ -52,6 +52,7 @@ import org.apache.poi.hssf.record.formula.eval.AreaEval;
|
|||||||
import org.apache.poi.hssf.record.formula.eval.BlankEval;
|
import org.apache.poi.hssf.record.formula.eval.BlankEval;
|
||||||
import org.apache.poi.hssf.record.formula.eval.BoolEval;
|
import org.apache.poi.hssf.record.formula.eval.BoolEval;
|
||||||
import org.apache.poi.hssf.record.formula.eval.ErrorEval;
|
import org.apache.poi.hssf.record.formula.eval.ErrorEval;
|
||||||
|
import org.apache.poi.hssf.record.formula.eval.EvaluationException;
|
||||||
import org.apache.poi.hssf.record.formula.eval.MissingArgEval;
|
import org.apache.poi.hssf.record.formula.eval.MissingArgEval;
|
||||||
import org.apache.poi.hssf.record.formula.eval.NameEval;
|
import org.apache.poi.hssf.record.formula.eval.NameEval;
|
||||||
import org.apache.poi.hssf.record.formula.eval.NameXEval;
|
import org.apache.poi.hssf.record.formula.eval.NameXEval;
|
||||||
@ -60,7 +61,9 @@ import org.apache.poi.hssf.record.formula.eval.OperationEval;
|
|||||||
import org.apache.poi.hssf.record.formula.eval.RefEval;
|
import org.apache.poi.hssf.record.formula.eval.RefEval;
|
||||||
import org.apache.poi.hssf.record.formula.eval.StringEval;
|
import org.apache.poi.hssf.record.formula.eval.StringEval;
|
||||||
import org.apache.poi.hssf.record.formula.eval.ValueEval;
|
import org.apache.poi.hssf.record.formula.eval.ValueEval;
|
||||||
|
import org.apache.poi.hssf.record.formula.functions.Choose;
|
||||||
import org.apache.poi.hssf.record.formula.functions.FreeRefFunction;
|
import org.apache.poi.hssf.record.formula.functions.FreeRefFunction;
|
||||||
|
import org.apache.poi.hssf.record.formula.functions.If;
|
||||||
import org.apache.poi.hssf.record.formula.udf.UDFFinder;
|
import org.apache.poi.hssf.record.formula.udf.UDFFinder;
|
||||||
import org.apache.poi.hssf.util.CellReference;
|
import org.apache.poi.hssf.util.CellReference;
|
||||||
import org.apache.poi.ss.formula.CollaboratingWorkbooksEnvironment.WorkbookNotFoundException;
|
import org.apache.poi.ss.formula.CollaboratingWorkbooksEnvironment.WorkbookNotFoundException;
|
||||||
@ -228,7 +231,7 @@ public final class WorkbookEvaluator {
|
|||||||
private ValueEval evaluateAny(EvaluationCell srcCell, int sheetIndex,
|
private ValueEval evaluateAny(EvaluationCell srcCell, int sheetIndex,
|
||||||
int rowIndex, int columnIndex, EvaluationTracker tracker) {
|
int rowIndex, int columnIndex, EvaluationTracker tracker) {
|
||||||
|
|
||||||
// avoid tracking dependencies for cells that have constant definition
|
// avoid tracking dependencies to cells that have constant definition
|
||||||
boolean shouldCellDependencyBeRecorded = _stabilityClassifier == null ? true
|
boolean shouldCellDependencyBeRecorded = _stabilityClassifier == null ? true
|
||||||
: !_stabilityClassifier.isCellFinal(sheetIndex, rowIndex, columnIndex);
|
: !_stabilityClassifier.isCellFinal(sheetIndex, rowIndex, columnIndex);
|
||||||
if (srcCell == null || srcCell.getCellType() != Cell.CELL_TYPE_FORMULA) {
|
if (srcCell == null || srcCell.getCellType() != Cell.CELL_TYPE_FORMULA) {
|
||||||
@ -344,6 +347,66 @@ public final class WorkbookEvaluator {
|
|||||||
byte nArgs = 1; // tAttrSum always has 1 parameter
|
byte nArgs = 1; // tAttrSum always has 1 parameter
|
||||||
ptg = new FuncVarPtg("SUM", nArgs);
|
ptg = new FuncVarPtg("SUM", nArgs);
|
||||||
}
|
}
|
||||||
|
if (attrPtg.isOptimizedChoose()) {
|
||||||
|
ValueEval arg0 = stack.pop();
|
||||||
|
int[] jumpTable = attrPtg.getJumpTable();
|
||||||
|
int dist;
|
||||||
|
int nChoices = jumpTable.length;
|
||||||
|
try {
|
||||||
|
int switchIndex = Choose.evaluateFirstArg(arg0, ec.getRowIndex(), ec.getColumnIndex());
|
||||||
|
if (switchIndex<1 || switchIndex > nChoices) {
|
||||||
|
stack.push(ErrorEval.VALUE_INVALID);
|
||||||
|
dist = attrPtg.getChooseFuncOffset() + 4; // +4 for tFuncFar(CHOOSE)
|
||||||
|
} else {
|
||||||
|
dist = jumpTable[switchIndex-1];
|
||||||
|
}
|
||||||
|
} catch (EvaluationException e) {
|
||||||
|
stack.push(e.getErrorEval());
|
||||||
|
dist = attrPtg.getChooseFuncOffset() + 4; // +4 for tFuncFar(CHOOSE)
|
||||||
|
}
|
||||||
|
// Encoded dist for tAttrChoose includes size of jump table, but
|
||||||
|
// countTokensToBeSkipped() does not (it counts whole tokens).
|
||||||
|
dist -= nChoices*2+2; // subtract jump table size
|
||||||
|
i+= countTokensToBeSkipped(ptgs, i, dist);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (attrPtg.isOptimizedIf()) {
|
||||||
|
ValueEval arg0 = stack.pop();
|
||||||
|
boolean evaluatedPredicate;
|
||||||
|
try {
|
||||||
|
evaluatedPredicate = If.evaluateFirstArg(arg0, ec.getRowIndex(), ec.getColumnIndex());
|
||||||
|
} catch (EvaluationException e) {
|
||||||
|
stack.push(e.getErrorEval());
|
||||||
|
int dist = attrPtg.getData();
|
||||||
|
i+= countTokensToBeSkipped(ptgs, i, dist);
|
||||||
|
attrPtg = (AttrPtg) ptgs[i];
|
||||||
|
dist = attrPtg.getData()+1;
|
||||||
|
i+= countTokensToBeSkipped(ptgs, i, dist);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (evaluatedPredicate) {
|
||||||
|
// nothing to skip - true param folows
|
||||||
|
} else {
|
||||||
|
int dist = attrPtg.getData();
|
||||||
|
i+= countTokensToBeSkipped(ptgs, i, dist);
|
||||||
|
Ptg nextPtg = ptgs[i+1];
|
||||||
|
if (ptgs[i] instanceof AttrPtg && nextPtg instanceof FuncVarPtg) {
|
||||||
|
// this is an if statement without a false param (as opposed to MissingArgPtg as the false param)
|
||||||
|
i++;
|
||||||
|
stack.push(BoolEval.FALSE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (attrPtg.isSkip()) {
|
||||||
|
int dist = attrPtg.getData()+1;
|
||||||
|
i+= countTokensToBeSkipped(ptgs, i, dist);
|
||||||
|
if (stack.peek() == MissingArgEval.instance) {
|
||||||
|
stack.pop();
|
||||||
|
stack.push(BlankEval.INSTANCE);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (ptg instanceof ControlPtg) {
|
if (ptg instanceof ControlPtg) {
|
||||||
// skip Parentheses, Attr, etc
|
// skip Parentheses, Attr, etc
|
||||||
@ -402,6 +465,27 @@ public final class WorkbookEvaluator {
|
|||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the number of tokens that the evaluator should skip upon reaching a tAttrSkip.
|
||||||
|
*
|
||||||
|
* @return the number of tokens (starting from <tt>startIndex+1</tt>) that need to be skipped
|
||||||
|
* to achieve the specified <tt>distInBytes</tt> skip distance.
|
||||||
|
*/
|
||||||
|
private static int countTokensToBeSkipped(Ptg[] ptgs, int startIndex, int distInBytes) {
|
||||||
|
int remBytes = distInBytes;
|
||||||
|
int index = startIndex;
|
||||||
|
while (remBytes != 0) {
|
||||||
|
index++;
|
||||||
|
remBytes -= ptgs[index].getSize();
|
||||||
|
if (remBytes < 0) {
|
||||||
|
throw new RuntimeException("Bad skip distance (wrong token size calculation).");
|
||||||
|
}
|
||||||
|
if (index >= ptgs.length) {
|
||||||
|
throw new RuntimeException("Skip distance too far (ran out of formula tokens).");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return index-startIndex;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Dereferences a single value from any AreaEval or RefEval evaluation result.
|
* Dereferences a single value from any AreaEval or RefEval evaluation result.
|
||||||
* If the supplied evaluationResult is just a plain value, it is returned as-is.
|
* If the supplied evaluationResult is just a plain value, it is returned as-is.
|
||||||
|
@ -31,6 +31,7 @@ import org.apache.poi.hssf.record.aggregates.FormulaRecordAggregate;
|
|||||||
import org.apache.poi.hssf.record.formula.AreaPtg;
|
import org.apache.poi.hssf.record.formula.AreaPtg;
|
||||||
import org.apache.poi.hssf.record.formula.FuncVarPtg;
|
import org.apache.poi.hssf.record.formula.FuncVarPtg;
|
||||||
import org.apache.poi.hssf.record.formula.Ptg;
|
import org.apache.poi.hssf.record.formula.Ptg;
|
||||||
|
import org.apache.poi.hssf.record.formula.eval.ErrorEval;
|
||||||
import org.apache.poi.hssf.record.formula.eval.ValueEval;
|
import org.apache.poi.hssf.record.formula.eval.ValueEval;
|
||||||
import org.apache.poi.ss.formula.EvaluationCell;
|
import org.apache.poi.ss.formula.EvaluationCell;
|
||||||
import org.apache.poi.ss.formula.EvaluationListener;
|
import org.apache.poi.ss.formula.EvaluationListener;
|
||||||
@ -319,6 +320,10 @@ public final class TestFormulaEvaluatorBugs extends TestCase {
|
|||||||
* The HSSFFormula evaluator performance benefits greatly from caching of intermediate cell values
|
* The HSSFFormula evaluator performance benefits greatly from caching of intermediate cell values
|
||||||
*/
|
*/
|
||||||
public void testSlowEvaluate45376() {
|
public void testSlowEvaluate45376() {
|
||||||
|
/*
|
||||||
|
* Note - to observe behaviour without caching, disable the call to
|
||||||
|
* updateValue() from FormulaCellCacheEntry.updateFormulaResult().
|
||||||
|
*/
|
||||||
|
|
||||||
// Firstly set up a sequence of formula cells where each depends on the previous multiple
|
// Firstly set up a sequence of formula cells where each depends on the previous multiple
|
||||||
// times. Without caching, each subsequent cell take about 4 times longer to evaluate.
|
// times. Without caching, each subsequent cell take about 4 times longer to evaluate.
|
||||||
@ -330,30 +335,39 @@ public final class TestFormulaEvaluatorBugs extends TestCase {
|
|||||||
char prevCol = (char) ('A' + i-1);
|
char prevCol = (char) ('A' + i-1);
|
||||||
String prevCell = prevCol + "1";
|
String prevCell = prevCol + "1";
|
||||||
// this formula is inspired by the offending formula of the attachment for bug 45376
|
// this formula is inspired by the offending formula of the attachment for bug 45376
|
||||||
|
// IF(DATE(YEAR(A1),MONTH(A1)+1,1)<=$D$3,DATE(YEAR(A1),MONTH(A1)+1,1),NA()) etc
|
||||||
String formula = "IF(DATE(YEAR(" + prevCell + "),MONTH(" + prevCell + ")+1,1)<=$D$3," +
|
String formula = "IF(DATE(YEAR(" + prevCell + "),MONTH(" + prevCell + ")+1,1)<=$D$3," +
|
||||||
"DATE(YEAR(" + prevCell + "),MONTH(" + prevCell + ")+1,1),NA())";
|
"DATE(YEAR(" + prevCell + "),MONTH(" + prevCell + ")+1,1),NA())";
|
||||||
cell.setCellFormula(formula);
|
cell.setCellFormula(formula);
|
||||||
|
|
||||||
}
|
}
|
||||||
Calendar cal = new GregorianCalendar(2000, 0, 1, 0, 0, 0);
|
Calendar cal = new GregorianCalendar(2000, 0, 1, 0, 0, 0);
|
||||||
row.createCell(0).setCellValue(cal);
|
row.createCell(0).setCellValue(cal);
|
||||||
|
|
||||||
// Choose cell A9, so that the failing test case doesn't take too long to execute.
|
// Choose cell A9 instead of A10, so that the failing test case doesn't take too long to execute.
|
||||||
HSSFCell cell = row.getCell(8);
|
HSSFCell cell = row.getCell(8);
|
||||||
EvalListener evalListener = new EvalListener();
|
EvalListener evalListener = new EvalListener();
|
||||||
WorkbookEvaluator evaluator = WorkbookEvaluatorTestHelper.createEvaluator(wb, evalListener);
|
WorkbookEvaluator evaluator = WorkbookEvaluatorTestHelper.createEvaluator(wb, evalListener);
|
||||||
evaluator.evaluate(HSSFEvaluationTestHelper.wrapCell(cell));
|
ValueEval ve = evaluator.evaluate(HSSFEvaluationTestHelper.wrapCell(cell));
|
||||||
int evalCount = evalListener.getCountCacheMisses();
|
int evalCount = evalListener.getCountCacheMisses();
|
||||||
if (evalCount > 10) {
|
if (evalCount > 10) {
|
||||||
// Without caching, evaluating cell 'A9' takes 21845 evaluations which consumes
|
// Without caching, evaluating cell 'A9' takes 21845 evaluations which consumes
|
||||||
// much time (~3 sec on Core 2 Duo 2.2GHz)
|
// much time (~3 sec on Core 2 Duo 2.2GHz)
|
||||||
|
// short-circuit-if optimisation cuts this down to 255 evaluations which is still too high
|
||||||
System.err.println("Cell A9 took " + evalCount + " intermediate evaluations");
|
System.err.println("Cell A9 took " + evalCount + " intermediate evaluations");
|
||||||
throw new AssertionFailedError("Identifed bug 45376 - Formula evaluator should cache values");
|
throw new AssertionFailedError("Identifed bug 45376 - Formula evaluator should cache values");
|
||||||
}
|
}
|
||||||
// With caching, the evaluationCount is 8 which is a big improvement
|
// With caching, the evaluationCount is 8 which is exactly the
|
||||||
// Note - these expected values may change if the WorkbookEvaluator is
|
// number of formula cells that needed to be evaluated.
|
||||||
// ever optimised to short circuit 'if' functions.
|
|
||||||
assertEquals(8, evalCount);
|
assertEquals(8, evalCount);
|
||||||
assertEquals(24, evalListener.getCountCacheHits());
|
|
||||||
|
// The cache hits would be 24 if fully evaluating all arguments of the
|
||||||
|
// "IF()" functions (Each of the 8 formulas has 4 refs to formula cells
|
||||||
|
// which result in 1 cache miss and 3 cache hits). However with the
|
||||||
|
// short-circuit-if optimisation, 2 of the cell refs get skipped
|
||||||
|
// reducing this metric 8.
|
||||||
|
assertEquals(8, evalListener.getCountCacheHits());
|
||||||
|
|
||||||
|
// confirm the evaluation result too
|
||||||
|
assertEquals(ErrorEval.NA, ve);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,13 @@ import junit.framework.TestCase;
|
|||||||
|
|
||||||
import org.apache.poi.hssf.HSSFTestDataSamples;
|
import org.apache.poi.hssf.HSSFTestDataSamples;
|
||||||
import org.apache.poi.hssf.record.NameRecord;
|
import org.apache.poi.hssf.record.NameRecord;
|
||||||
|
import org.apache.poi.hssf.record.formula.Ptg;
|
||||||
|
import org.apache.poi.hssf.record.formula.eval.NumberEval;
|
||||||
|
import org.apache.poi.hssf.record.formula.eval.ValueEval;
|
||||||
|
import org.apache.poi.ss.formula.EvaluationCell;
|
||||||
|
import org.apache.poi.ss.formula.EvaluationListener;
|
||||||
|
import org.apache.poi.ss.formula.WorkbookEvaluator;
|
||||||
|
import org.apache.poi.ss.formula.WorkbookEvaluatorTestHelper;
|
||||||
import org.apache.poi.ss.usermodel.Cell;
|
import org.apache.poi.ss.usermodel.Cell;
|
||||||
import org.apache.poi.ss.usermodel.CellValue;
|
import org.apache.poi.ss.usermodel.CellValue;
|
||||||
/**
|
/**
|
||||||
@ -167,4 +174,46 @@ public final class TestHSSFFormulaEvaluator extends TestCase {
|
|||||||
assertEquals(Cell.CELL_TYPE_NUMERIC, value.getCellType());
|
assertEquals(Cell.CELL_TYPE_NUMERIC, value.getCellType());
|
||||||
assertEquals(5.33, value.getNumberValue(), 0.0);
|
assertEquals(5.33, value.getNumberValue(), 0.0);
|
||||||
}
|
}
|
||||||
|
private static final class EvalCountListener extends EvaluationListener {
|
||||||
|
private int _evalCount;
|
||||||
|
public EvalCountListener() {
|
||||||
|
_evalCount = 0;
|
||||||
|
}
|
||||||
|
public void onStartEvaluate(EvaluationCell cell, ICacheEntry entry, Ptg[] ptgs) {
|
||||||
|
_evalCount++;
|
||||||
|
}
|
||||||
|
public int getEvalCount() {
|
||||||
|
return _evalCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The HSSFFormula evaluator performance benefits greatly from caching of intermediate cell values
|
||||||
|
*/
|
||||||
|
public void testShortCircuitIfEvaluation() {
|
||||||
|
|
||||||
|
// Set up a simple IF() formula that has measurable evaluation cost for its operands.
|
||||||
|
HSSFWorkbook wb = new HSSFWorkbook();
|
||||||
|
HSSFSheet sheet = wb.createSheet("Sheet1");
|
||||||
|
HSSFRow row = sheet.createRow(0);
|
||||||
|
HSSFCell cellA1 = row.createCell(0);
|
||||||
|
cellA1.setCellFormula("if(B1,C1,D1+E1+F1)");
|
||||||
|
// populate cells B1..F1 with simple formulas instead of plain values so we can use
|
||||||
|
// EvaluationListener to check which parts of the first formula get evaluated
|
||||||
|
for (int i=1; i<6; i++) {
|
||||||
|
// formulas are just literal constants "1".."5"
|
||||||
|
row.createCell(i).setCellFormula(String.valueOf(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
EvalCountListener evalListener = new EvalCountListener();
|
||||||
|
WorkbookEvaluator evaluator = WorkbookEvaluatorTestHelper.createEvaluator(wb, evalListener);
|
||||||
|
ValueEval ve = evaluator.evaluate(HSSFEvaluationTestHelper.wrapCell(cellA1));
|
||||||
|
int evalCount = evalListener.getEvalCount();
|
||||||
|
if (evalCount == 6) {
|
||||||
|
// Without short-circuit-if evaluation, evaluating cell 'A1' takes 3 extra evaluations (for D1,E1,F1)
|
||||||
|
throw new AssertionFailedError("Identifed bug 48195 - Formula evaluator should short-circuit IF() calculations.");
|
||||||
|
}
|
||||||
|
assertEquals(3, evalCount);
|
||||||
|
assertEquals(2.0, ((NumberEval)ve).getNumberValue(), 0D);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user