Changes to formula evaluation allowing for reduced memory usage

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@713811 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Josh Micich 2008-11-13 20:22:17 +00:00
parent 7a98ca6d17
commit 9873ea6391
10 changed files with 395 additions and 265 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! -->
<release version="3.5-beta4" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">Changes to formula evaluation allowing for reduced memory usage</action>
<action dev="POI-DEVELOPERS" type="fix">45290 - Support odd files where the POIFS header block comes after the data blocks, and is on the data blocks list</header>
<action dev="POI-DEVELOPERS" type="fix">46184 - More odd escaped date formats</action>
<action dev="POI-DEVELOPERS" type="add">Include the sheet number in the output of XLS2CSVmra</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.5-beta4" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">Changes to formula evaluation allowing for reduced memory usage</action>
<action dev="POI-DEVELOPERS" type="fix">45290 - Support odd files where the POIFS header block comes after the data blocks, and is on the data blocks list</header>
<action dev="POI-DEVELOPERS" type="fix">46184 - More odd escaped date formats</action>
<action dev="POI-DEVELOPERS" type="add">Include the sheet number in the output of XLS2CSVmra</action>

View File

@ -25,6 +25,7 @@ import org.apache.poi.hssf.record.formula.eval.NumberEval;
import org.apache.poi.hssf.record.formula.eval.StringEval;
import org.apache.poi.hssf.record.formula.eval.ValueEval;
import org.apache.poi.ss.formula.CollaboratingWorkbooksEnvironment;
import org.apache.poi.ss.formula.IStabilityClassifier;
import org.apache.poi.ss.formula.WorkbookEvaluator;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellValue;
@ -54,7 +55,15 @@ public class HSSFFormulaEvaluator implements FormulaEvaluator {
}
}
public HSSFFormulaEvaluator(HSSFWorkbook workbook) {
_bookEvaluator = new WorkbookEvaluator(HSSFEvaluationWorkbook.create(workbook));
this(workbook, null);
}
/**
* @param stabilityClassifier used to optimise caching performance. Pass <code>null</code>
* for the (conservative) assumption that any cell may have its definition changed after
* evaluation begins.
*/
public HSSFFormulaEvaluator(HSSFWorkbook workbook, IStabilityClassifier stabilityClassifier) {
_bookEvaluator = new WorkbookEvaluator(HSSFEvaluationWorkbook.create(workbook), stabilityClassifier);
}
/**
@ -197,8 +206,7 @@ public class HSSFFormulaEvaluator implements FormulaEvaluator {
* </pre>
* Be aware that your cell value will be changed to hold the
* result of the formula. If you simply want the formula
* value computed for you, use {@link #evaluateFormulaCell(org.apache.poi.ss.usermodel.Cell)}}
* @param cell
* value computed for you, use {@link #evaluateFormulaCell(Cell)}}
*/
public HSSFCell evaluateInCell(Cell cell) {
if (cell == null) {

View File

@ -46,6 +46,15 @@ final class FormulaCellCacheEntry extends CellCacheEntry {
}
public boolean isInputSensitive() {
if (_sensitiveInputCells != null) {
if (_sensitiveInputCells.length > 0 ) {
return true;
}
}
return _usedBlankCellGroup == null ? false : !_usedBlankCellGroup.isEmpty();
}
public void setSensitiveInputCells(CellCacheEntry[] sensitiveInputCells) {
// need to tell all cells that were previously used, but no longer are,
// that they are not consumed by this cell any more

View File

@ -186,4 +186,8 @@ final class FormulaUsedBlankCellSet {
}
return bcsg.containsCell(rowIndex, columnIndex);
}
public boolean isEmpty() {
return _sheetGroupsByBookSheet.isEmpty();
}
}

View File

@ -0,0 +1,84 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.ss.formula;
import org.apache.poi.hssf.usermodel.HSSFFormulaEvaluator;
/**
* Used to help optimise cell evaluation result caching by allowing applications to specify which
* parts of a workbook are <em>final</em>.<br/>
* The term <b>final</b> is introduced here to denote immutability or 'having constant definition'.
* This classification refers to potential actions (on the evaluated workbook) by the evaluating
* application. It does not refer to operations performed by the evaluator ({@link
* WorkbookEvaluator}).<br/>
* <br/>
* <b>General guidelines</b>:
* <ul>
* <li>a plain value cell can be marked as 'final' if it will not be changed after the first call
* to {@link WorkbookEvaluator#evaluate(EvaluationCell)}.
* </li>
* <li>a formula cell can be marked as 'final' if its formula will not be changed after the first
* call to {@link WorkbookEvaluator#evaluate(EvaluationCell)}. This remains true even if changes
* in dependent values may cause the evaluated value to change.</li>
* <li>plain value cells should be marked as 'not final' if their plain value value may change.
* </li>
* <li>formula cells should be marked as 'not final' if their formula definition may change.</li>
* <li>cells which may switch between plain value and formula should also be marked as 'not final'.
* </li>
* </ul>
* <b>Notes</b>:
* <ul>
* <li>If none of the spreadsheet cells is expected to have its definition changed after evaluation
* begins, every cell can be marked as 'final'. This is the most efficient / least resource
* intensive option.</li>
* <li>To retain freedom to change any cell definition at any time, an application may classify all
* cells as 'not final'. This freedom comes at the expense of greater memory consumption.</li>
* <li>For the purpose of these classifications, setting the cached formula result of a cell (for
* example in {@link HSSFFormulaEvaluator#evaluateFormulaCell(org.apache.poi.ss.usermodel.Cell)})
* does not constitute changing the definition of the cell.</li>
* <li>Updating cells which have been classified as 'final' will cause the evaluator to behave
* unpredictably (typically ignoring the update).</li>
* </ul>
*
* @author Josh Micich
*/
public interface IStabilityClassifier {
/**
* Convenience implementation for situations where all cell definitions remain fixed after
* evaluation begins.
*/
IStabilityClassifier TOTALLY_IMMUTABLE = new IStabilityClassifier() {
public boolean isCellFinal(int sheetIndex, int rowIndex, int columnIndex) {
return true;
}
};
/**
* Checks if a cell's value(/formula) is fixed - in other words - not expected to be modified
* between calls to the evaluator. (Note - this is an independent concept from whether a
* formula cell's evaluated value may change during successive calls to the evaluator).
*
* @param sheetIndex zero based index into workbook sheet list
* @param rowIndex zero based row index of cell
* @param columnIndex zero based column index of cell
* @return <code>false</code> if the evaluating application may need to modify the specified
* cell between calls to the evaluator.
*/
boolean isCellFinal(int sheetIndex, int rowIndex, int columnIndex);
}

View File

@ -82,19 +82,22 @@ public final class WorkbookEvaluator {
private int _workbookIx;
private final IEvaluationListener _evaluationListener;
private final Map _sheetIndexesBySheet;
private final Map<EvaluationSheet, Integer> _sheetIndexesBySheet;
private CollaboratingWorkbooksEnvironment _collaboratingWorkbookEnvironment;
private final IStabilityClassifier _stabilityClassifier;
public WorkbookEvaluator(EvaluationWorkbook workbook) {
this (workbook, null);
public WorkbookEvaluator(EvaluationWorkbook workbook, IStabilityClassifier stabilityClassifier) {
this (workbook, null, stabilityClassifier);
}
/* package */ WorkbookEvaluator(EvaluationWorkbook workbook, IEvaluationListener evaluationListener) {
/* package */ WorkbookEvaluator(EvaluationWorkbook workbook, IEvaluationListener evaluationListener,
IStabilityClassifier stabilityClassifier) {
_workbook = workbook;
_evaluationListener = evaluationListener;
_cache = new EvaluationCache(evaluationListener);
_sheetIndexesBySheet = new IdentityHashMap();
_sheetIndexesBySheet = new IdentityHashMap<EvaluationSheet, Integer>();
_collaboratingWorkbookEnvironment = CollaboratingWorkbooksEnvironment.EMPTY;
_workbookIx = 0;
_stabilityClassifier = stabilityClassifier;
}
/**
@ -158,7 +161,7 @@ public final class WorkbookEvaluator {
}
private int getSheetIndex(EvaluationSheet sheet) {
Integer result = (Integer) _sheetIndexesBySheet.get(sheet);
Integer result = _sheetIndexesBySheet.get(sheet);
if (result == null) {
int sheetIndex = _workbook.getSheetIndex(sheet);
if (sheetIndex < 0) {
@ -182,14 +185,21 @@ public final class WorkbookEvaluator {
private ValueEval evaluateAny(EvaluationCell srcCell, int sheetIndex,
int rowIndex, int columnIndex, EvaluationTracker tracker) {
// avoid tracking dependencies for cells that have constant definition
boolean shouldCellDependencyBeRecorded = _stabilityClassifier == null ? true
: !_stabilityClassifier.isCellFinal(sheetIndex, rowIndex, columnIndex);
if (srcCell == null || srcCell.getCellType() != Cell.CELL_TYPE_FORMULA) {
ValueEval result = getValueFromNonFormulaCell(srcCell);
if (shouldCellDependencyBeRecorded) {
tracker.acceptPlainValueDependency(_workbookIx, sheetIndex, rowIndex, columnIndex, result);
}
return result;
}
FormulaCellCacheEntry cce = _cache.getOrCreateFormulaCellEntry(srcCell);
if (shouldCellDependencyBeRecorded || cce.isInputSensitive()) {
tracker.acceptFormulaDependency(cce);
}
IEvaluationListener evalListener = _evaluationListener;
if (cce.getValue() == null) {
if (!tracker.startEvaluate(cce)) {
@ -252,7 +262,7 @@ public final class WorkbookEvaluator {
// visibility raised for testing
/* package */ ValueEval evaluateFormula(int sheetIndex, int srcRowNum, int srcColNum, Ptg[] ptgs, EvaluationTracker tracker) {
Stack stack = new Stack();
Stack<Eval> stack = new Stack<Eval>();
for (int i = 0, iSize = ptgs.length; i < iSize; i++) {
// since we don't know how to handle these yet :(
@ -289,7 +299,7 @@ public final class WorkbookEvaluator {
// storing the ops in reverse order since they are popping
for (int j = numops - 1; j >= 0; j--) {
Eval p = (Eval) stack.pop();
Eval p = stack.pop();
ops[j] = p;
}
// logDebug("invoke " + operation + " (nAgs=" + numops + ")");
@ -307,7 +317,7 @@ public final class WorkbookEvaluator {
stack.push(opResult);
}
ValueEval value = ((ValueEval) stack.pop());
ValueEval value = (ValueEval) stack.pop();
if (!stack.isEmpty()) {
throw new IllegalStateException("evaluation stack not empty");
}

View File

@ -24,6 +24,7 @@ import org.apache.poi.hssf.record.formula.eval.ErrorEval;
import org.apache.poi.hssf.record.formula.eval.NumberEval;
import org.apache.poi.hssf.record.formula.eval.StringEval;
import org.apache.poi.hssf.record.formula.eval.ValueEval;
import org.apache.poi.ss.formula.IStabilityClassifier;
import org.apache.poi.ss.formula.WorkbookEvaluator;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellValue;
@ -46,7 +47,15 @@ public class XSSFFormulaEvaluator implements FormulaEvaluator {
private WorkbookEvaluator _bookEvaluator;
public XSSFFormulaEvaluator(XSSFWorkbook workbook) {
_bookEvaluator = new WorkbookEvaluator(XSSFEvaluationWorkbook.create(workbook));
this(workbook, null);
}
/**
* @param stabilityClassifier used to optimise caching performance. Pass <code>null</code>
* for the (conservative) assumption that any cell may have its definition changed after
* evaluation begins.
*/
public XSSFFormulaEvaluator(XSSFWorkbook workbook, IStabilityClassifier stabilityClassifier) {
_bookEvaluator = new WorkbookEvaluator(XSSFEvaluationWorkbook.create(workbook), stabilityClassifier);
}
/**

View File

@ -42,6 +42,10 @@ import org.apache.poi.hssf.usermodel.HSSFWorkbook;
*/
public class TestWorkbookEvaluator extends TestCase {
private static WorkbookEvaluator createEvaluator() {
return new WorkbookEvaluator(null, null);
}
/**
* Make sure that the evaluator can directly handle tAttrSum (instead of relying on re-parsing
* the whole formula which converts tAttrSum to tFuncVar("SUM") )
@ -53,7 +57,7 @@ public class TestWorkbookEvaluator extends TestCase {
AttrPtg.SUM,
};
ValueEval result = new WorkbookEvaluator(null).evaluateFormula(0, 0, 0, ptgs, null);
ValueEval result = createEvaluator().evaluateFormula(0, 0, 0, ptgs, null);
assertEquals(42, ((NumberEval)result).getNumberValue(), 0.0);
}
@ -74,7 +78,7 @@ public class TestWorkbookEvaluator extends TestCase {
ptg,
};
ValueEval result = new WorkbookEvaluator(null).evaluateFormula(0, 0, 0, ptgs, null);
ValueEval result = createEvaluator().evaluateFormula(0, 0, 0, ptgs, null);
assertEquals(ErrorEval.REF_INVALID, result);
}
@ -89,7 +93,7 @@ public class TestWorkbookEvaluator extends TestCase {
AttrPtg.SUM,
};
ValueEval result = new WorkbookEvaluator(null).evaluateFormula(0, 0, 0, ptgs, null);
ValueEval result = createEvaluator().evaluateFormula(0, 0, 0, ptgs, null);
assertEquals(42, ((NumberEval)result).getNumberValue(), 0.0);
}

View File

@ -32,6 +32,6 @@ public final class WorkbookEvaluatorTestHelper {
}
public static WorkbookEvaluator createEvaluator(HSSFWorkbook wb, EvaluationListener listener) {
return new WorkbookEvaluator(HSSFEvaluationWorkbook.create(wb), listener);
return new WorkbookEvaluator(HSSFEvaluationWorkbook.create(wb), listener, null);
}
}