Fix for 45060 (and 45041) - Improved token class transformation during formula parsing

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@660828 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Josh Micich 2008-05-28 06:19:31 +00:00
parent 1afa028f34
commit a811443b4b
13 changed files with 834 additions and 327 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! -->
<release version="3.1-final" date="2008-06-??">
<action dev="POI-DEVELOPERS" type="add">45060 - Improved token class transformation during formula parsing</action>
<action dev="POI-DEVELOPERS" type="add">44840 - Improved handling of HSSFObjectData, especially for entries with data held not in POIFS</action>
<action dev="POI-DEVELOPERS" type="add">45043 - Support for getting excel cell comments when extracting text</action>
<action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.1-final" date="2008-06-??">
<action dev="POI-DEVELOPERS" type="add">45060 - Improved token class transformation during formula parsing</action>
<action dev="POI-DEVELOPERS" type="add">44840 - Improved handling of HSSFObjectData, especially for entries with data held not in POIFS</action>
<action dev="POI-DEVELOPERS" type="add">45043 - Support for getting excel cell comments when extracting text</action>
<action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action>

View File

@ -18,7 +18,6 @@
package org.apache.poi.hssf.model;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Stack;
import java.util.regex.Pattern;
@ -61,17 +60,17 @@ public final class FormulaParser {
}
}
public static int FORMULA_TYPE_CELL = 0;
public static int FORMULA_TYPE_SHARED = 1;
public static int FORMULA_TYPE_ARRAY =2;
public static int FORMULA_TYPE_CONDFOMRAT = 3;
public static int FORMULA_TYPE_NAMEDRANGE = 4;
public static final int FORMULA_TYPE_CELL = 0;
public static final int FORMULA_TYPE_SHARED = 1;
public static final int FORMULA_TYPE_ARRAY =2;
public static final int FORMULA_TYPE_CONDFOMRAT = 3;
public static final int FORMULA_TYPE_NAMEDRANGE = 4;
private final String formulaString;
private final int formulaLength;
private int pointer;
private final List tokens = new Stack();
private ParseNode _rootNode;
/**
* Used for spotting if we have a cell reference,
@ -221,14 +220,15 @@ public final class FormulaParser {
return value.length() == 0 ? null : value.toString();
}
/** Parse and Translate a String Identifier */
private Ptg parseIdent() {
String name;
name = GetName();
private ParseNode parseFunctionOrIdentifier() {
String name = GetName();
if (look == '('){
//This is a function
return function(name);
}
return new ParseNode(parseIdentifier(name));
}
private Ptg parseIdentifier(String name) {
if (look == ':' || look == '.') { // this is a AreaReference
GetChar();
@ -287,14 +287,6 @@ public final class FormulaParser {
+ name + "\", but that named range wasn't defined!");
}
/**
* Adds a pointer to the last token to the latest function argument list.
* @param obj
*/
private void addArgumentPointer(List argumentPointers) {
argumentPointers.add(tokens.get(tokens.size()-1));
}
/**
* Note - Excel function names are 'case aware but not case sensitive'. This method may end
* up creating a defined name record in the workbook if the specified name is not an internal
@ -302,58 +294,23 @@ public final class FormulaParser {
*
* @param name case preserved function name (as it was entered/appeared in the formula).
*/
private Ptg function(String name) {
int numArgs =0 ;
private ParseNode function(String name) {
NamePtg nameToken = null;
// Note regarding parameter -
if(!AbstractFunctionPtg.isInternalFunctionName(name)) {
// external functions get a Name token which points to a defined name record
NamePtg nameToken = new NamePtg(name, this.book);
nameToken = new NamePtg(name, this.book);
// in the token tree, the name is more or less the first argument
numArgs++;
tokens.add(nameToken);
}
//average 2 args per function
List argumentPointers = new ArrayList(2);
Match('(');
numArgs += Arguments(argumentPointers);
ParseNode[] args = Arguments();
Match(')');
return getFunction(name, numArgs, argumentPointers);
return getFunction(name, nameToken, args);
}
/**
* Adds the size of all the ptgs after the provided index (inclusive).
* <p>
* Initially used to count a goto
* @param index
* @return int
*/
private int getPtgSize(int index) {
int count = 0;
Iterator ptgIterator = tokens.listIterator(index);
while (ptgIterator.hasNext()) {
Ptg ptg = (Ptg)ptgIterator.next();
count+=ptg.getSize();
}
return count;
}
private int getPtgSize(int start, int end) {
int count = 0;
int index = start;
Iterator ptgIterator = tokens.listIterator(index);
while (ptgIterator.hasNext() && index <= end) {
Ptg ptg = (Ptg)ptgIterator.next();
count+=ptg.getSize();
index++;
}
return count;
}
/**
* Generates the variable function ptg for the formula.
* <p>
@ -362,84 +319,35 @@ public final class FormulaParser {
* @param numArgs
* @return Ptg a null is returned if we're in an IF formula, it needs extreme manipulation and is handled in this function
*/
private AbstractFunctionPtg getFunction(String name, int numArgs, List argumentPointers) {
private ParseNode getFunction(String name, NamePtg namePtg, ParseNode[] args) {
boolean isVarArgs;
int funcIx;
FunctionMetadata fm = FunctionMetadataRegistry.getFunctionByName(name.toUpperCase());
int numArgs = args.length;
if(fm == null) {
if (namePtg == null) {
throw new IllegalStateException("NamePtg must be supplied for external functions");
}
// must be external function
isVarArgs = true;
funcIx = FunctionMetadataRegistry.FUNCTION_INDEX_EXTERNAL;
} else {
isVarArgs = !fm.hasFixedArgsLength();
funcIx = fm.getIndex();
validateNumArgs(numArgs, fm);
ParseNode[] allArgs = new ParseNode[numArgs+1];
allArgs[0] = new ParseNode(namePtg);
System.arraycopy(args, 0, allArgs, 1, numArgs);
return new ParseNode(new FuncVarPtg(name, (byte)(numArgs+1)), allArgs);
}
if (namePtg != null) {
throw new IllegalStateException("NamePtg no applicable to internal functions");
}
boolean isVarArgs = !fm.hasFixedArgsLength();
int funcIx = fm.getIndex();
validateNumArgs(args.length, fm);
AbstractFunctionPtg retval;
if(isVarArgs) {
retval = new FuncVarPtg(name, (byte)numArgs);
} else {
retval = new FuncPtg(funcIx);
}
if (!name.equalsIgnoreCase(AbstractFunctionPtg.FUNCTION_NAME_IF)) {
// early return for everything else besides IF()
return retval;
}
AttrPtg ifPtg = new AttrPtg();
ifPtg.setData((short)7); //mirroring excel output
ifPtg.setOptimizedIf(true);
if (argumentPointers.size() != 2 && argumentPointers.size() != 3) {
throw new IllegalArgumentException("["+argumentPointers.size()+"] Arguments Found - An IF formula requires 2 or 3 arguments. IF(CONDITION, TRUE_VALUE, FALSE_VALUE [OPTIONAL]");
}
//Biffview of an IF formula record indicates the attr ptg goes after the condition ptgs and are
//tracked in the argument pointers
//The beginning first argument pointer is the last ptg of the condition
int ifIndex = tokens.indexOf(argumentPointers.get(0))+1;
tokens.add(ifIndex, ifPtg);
//we now need a goto ptgAttr to skip to the end of the formula after a true condition
//the true condition is should be inserted after the last ptg in the first argument
int gotoIndex = tokens.indexOf(argumentPointers.get(1))+1;
AttrPtg goto1Ptg = new AttrPtg();
goto1Ptg.setGoto(true);
tokens.add(gotoIndex, goto1Ptg);
if (numArgs > 2) { //only add false jump if there is a false condition
//second goto to skip past the function ptg
AttrPtg goto2Ptg = new AttrPtg();
goto2Ptg.setGoto(true);
goto2Ptg.setData((short)(retval.getSize()-1));
//Page 472 of the Microsoft Excel Developer's kit states that:
//The b(or w) field specifies the number byes (or words to skip, minus 1
tokens.add(goto2Ptg); //this goes after all the arguments are defined
}
//data portion of the if ptg points to the false subexpression (Page 472 of MS Excel Developer's kit)
//count the number of bytes after the ifPtg to the False Subexpression
//doesn't specify -1 in the documentation
ifPtg.setData((short)(getPtgSize(ifIndex+1, gotoIndex)));
//count all the additional (goto) ptgs but dont count itself
int ptgCount = this.getPtgSize(gotoIndex)-goto1Ptg.getSize()+retval.getSize();
if (ptgCount > Short.MAX_VALUE) {
throw new RuntimeException("Ptg Size exceeds short when being specified for a goto ptg in an if");
}
goto1Ptg.setData((short)(ptgCount-1));
return retval;
return new ParseNode(retval, args);
}
private void validateNumArgs(int numArgs, FunctionMetadata fm) {
@ -470,10 +378,12 @@ public final class FormulaParser {
}
/** get arguments to a function */
private int Arguments(List argumentPointers) {
private ParseNode[] Arguments() {
//average 2 args per function
List temp = new ArrayList(2);
SkipWhite();
if(look == ')') {
return 0;
return ParseNode.EMPTY_ARRAY;
}
boolean missedPrevArg = true;
@ -482,8 +392,7 @@ public final class FormulaParser {
SkipWhite();
if (isArgumentDelimiter(look)) {
if (missedPrevArg) {
tokens.add(new MissingArgPtg());
addArgumentPointer(argumentPointers);
temp.add(new ParseNode(new MissingArgPtg()));
numArgs++;
}
if (look == ')') {
@ -493,8 +402,7 @@ public final class FormulaParser {
missedPrevArg = true;
continue;
}
comparisonExpression();
addArgumentPointer(argumentPointers);
temp.add(comparisonExpression());
numArgs++;
missedPrevArg = false;
SkipWhite();
@ -502,32 +410,34 @@ public final class FormulaParser {
throw expected("',' or ')'");
}
}
return numArgs;
ParseNode[] result = new ParseNode[temp.size()];
temp.toArray(result);
return result;
}
/** Parse and Translate a Math Factor */
private void powerFactor() {
percentFactor();
private ParseNode powerFactor() {
ParseNode result = percentFactor();
while(true) {
SkipWhite();
if(look != '^') {
return;
return result;
}
Match('^');
percentFactor();
tokens.add(new PowerPtg());
ParseNode other = percentFactor();
result = new ParseNode(new PowerPtg(), result, other);
}
}
private void percentFactor() {
tokens.add(parseSimpleFactor());
private ParseNode percentFactor() {
ParseNode result = parseSimpleFactor();
while(true) {
SkipWhite();
if(look != '%') {
return;
return result;
}
Match('%');
tokens.add(new PercentPtg());
result = new ParseNode(new PercentPtg(), result);
}
}
@ -535,32 +445,30 @@ public final class FormulaParser {
/**
* factors (without ^ or % )
*/
private Ptg parseSimpleFactor() {
private ParseNode parseSimpleFactor() {
SkipWhite();
switch(look) {
case '#':
return parseErrorLiteral();
return new ParseNode(parseErrorLiteral());
case '-':
Match('-');
powerFactor();
return new UnaryMinusPtg();
return new ParseNode(new UnaryMinusPtg(), powerFactor());
case '+':
Match('+');
powerFactor();
return new UnaryPlusPtg();
return new ParseNode(new UnaryPlusPtg(), powerFactor());
case '(':
Match('(');
comparisonExpression();
ParseNode inside = comparisonExpression();
Match(')');
return new ParenthesisPtg();
return new ParseNode(new ParenthesisPtg(), inside);
case '"':
return parseStringLiteral();
return new ParseNode(parseStringLiteral());
}
if (IsAlpha(look) || look == '\''){
return parseIdent();
return parseFunctionOrIdentifier();
}
// else - assume number
return parseNumber();
return new ParseNode(parseNumber());
}
@ -716,28 +624,30 @@ public final class FormulaParser {
}
/** Parse and Translate a Math Term */
private void Term() {
powerFactor();
private ParseNode Term() {
ParseNode result = powerFactor();
while(true) {
SkipWhite();
Ptg operator;
switch(look) {
case '*':
Match('*');
powerFactor();
tokens.add(new MultiplyPtg());
continue;
operator = new MultiplyPtg();
break;
case '/':
Match('/');
powerFactor();
tokens.add(new DividePtg());
continue;
operator = new DividePtg();
break;
default:
return result; // finished with Term
}
return; // finished with Term
ParseNode other = powerFactor();
result = new ParseNode(operator, result, other);
}
}
private void comparisonExpression() {
concatExpression();
private ParseNode comparisonExpression() {
ParseNode result = concatExpression();
while (true) {
SkipWhite();
switch(look) {
@ -745,11 +655,11 @@ public final class FormulaParser {
case '>':
case '<':
Ptg comparisonToken = getComparisonToken();
concatExpression();
tokens.add(comparisonToken);
ParseNode other = concatExpression();
result = new ParseNode(comparisonToken, result, other);
continue;
}
return; // finished with predicate expression
return result; // finished with predicate expression
}
}
@ -779,38 +689,41 @@ public final class FormulaParser {
}
private void concatExpression() {
additiveExpression();
private ParseNode concatExpression() {
ParseNode result = additiveExpression();
while (true) {
SkipWhite();
if(look != '&') {
break; // finished with concat expression
}
Match('&');
additiveExpression();
tokens.add(new ConcatPtg());
ParseNode other = additiveExpression();
result = new ParseNode(new ConcatPtg(), result, other);
}
return result;
}
/** Parse and Translate an Expression */
private void additiveExpression() {
Term();
private ParseNode additiveExpression() {
ParseNode result = Term();
while (true) {
SkipWhite();
Ptg operator;
switch(look) {
case '+':
Match('+');
Term();
tokens.add(new AddPtg());
continue;
operator = new AddPtg();
break;
case '-':
Match('-');
Term();
tokens.add(new SubtractPtg());
continue;
operator = new SubtractPtg();
break;
default:
return result; // finished with additive expression
}
return; // finished with additive expression
ParseNode other = Term();
result = new ParseNode(operator, result, other);
}
}
@ -835,7 +748,7 @@ end;
public void parse() {
pointer=0;
GetChar();
comparisonExpression();
_rootNode = comparisonExpression();
if(pointer <= formulaLength) {
String msg = "Unused input [" + formulaString.substring(pointer-1)
@ -858,91 +771,12 @@ end;
}
public Ptg[] getRPNPtg(int formulaType) {
Node node = createTree();
OperandClassTransformer oct = new OperandClassTransformer(formulaType);
// RVA is for 'operand class': 'reference', 'value', 'array'
setRootLevelRVA(node, formulaType);
setParameterRVA(node,formulaType);
return (Ptg[]) tokens.toArray(new Ptg[0]);
oct.transformFormula(_rootNode);
return ParseNode.toTokenArray(_rootNode);
}
private void setRootLevelRVA(Node n, int formulaType) {
//Pg 16, excelfileformat.pdf @ openoffice.org
Ptg p = n.getValue();
if (formulaType == FormulaParser.FORMULA_TYPE_NAMEDRANGE) {
if (p.getDefaultOperandClass() == Ptg.CLASS_REF) {
setClass(n,Ptg.CLASS_REF);
} else {
setClass(n,Ptg.CLASS_ARRAY);
}
} else {
setClass(n,Ptg.CLASS_VALUE);
}
}
private void setParameterRVA(Node n, int formulaType) {
Ptg p = n.getValue();
int numOperands = n.getNumChildren();
if (p instanceof AbstractFunctionPtg) {
for (int i =0;i<numOperands;i++) {
setParameterRVA(n.getChild(i),((AbstractFunctionPtg)p).getParameterClass(i),formulaType);
// if (n.getChild(i).getValue() instanceof AbstractFunctionPtg) {
// setParameterRVA(n.getChild(i),formulaType);
// }
setParameterRVA(n.getChild(i),formulaType);
}
} else {
for (int i =0;i<numOperands;i++) {
setParameterRVA(n.getChild(i),formulaType);
}
}
}
private void setParameterRVA(Node n, int expectedClass,int formulaType) {
Ptg p = n.getValue();
if (expectedClass == Ptg.CLASS_REF) { //pg 15, table 1
if (p.getDefaultOperandClass() == Ptg.CLASS_REF ) {
setClass(n, Ptg.CLASS_REF);
}
if (p.getDefaultOperandClass() == Ptg.CLASS_VALUE) {
if (formulaType==FORMULA_TYPE_CELL || formulaType == FORMULA_TYPE_SHARED) {
setClass(n,Ptg.CLASS_VALUE);
} else {
setClass(n,Ptg.CLASS_ARRAY);
}
}
if (p.getDefaultOperandClass() == Ptg.CLASS_ARRAY ) {
setClass(n, Ptg.CLASS_ARRAY);
}
} else if (expectedClass == Ptg.CLASS_VALUE) { //pg 15, table 2
if (formulaType == FORMULA_TYPE_NAMEDRANGE) {
setClass(n,Ptg.CLASS_ARRAY) ;
} else {
setClass(n,Ptg.CLASS_VALUE);
}
} else { //Array class, pg 16.
if (p.getDefaultOperandClass() == Ptg.CLASS_VALUE &&
(formulaType==FORMULA_TYPE_CELL || formulaType == FORMULA_TYPE_SHARED)) {
setClass(n,Ptg.CLASS_VALUE);
} else {
setClass(n,Ptg.CLASS_ARRAY);
}
}
}
private void setClass(Node n, byte theClass) {
Ptg p = n.getValue();
if (p.isBaseToken()) {
return;
}
if (p instanceof AbstractFunctionPtg || !(p instanceof OperationPtg)) {
p.setClass(theClass);
} else {
for (int i =0;i<n.getNumChildren();i++) {
setClass(n.getChild(i),theClass);
}
}
}
/**
* Convenience method which takes in a list then passes it to the
* other toFormulaString signature.
@ -1067,59 +901,4 @@ end;
public String toFormulaString(Ptg[] ptgs) {
return toFormulaString(book, ptgs);
}
/** Create a tree representation of the RPN token array
*used to run the class(RVA) change algo
*/
private Node createTree() {
Stack stack = new Stack();
int numPtgs = tokens.size();
OperationPtg o;
int numOperands;
Node[] operands;
for (int i=0;i<numPtgs;i++) {
if (tokens.get(i) instanceof OperationPtg) {
o = (OperationPtg) tokens.get(i);
numOperands = o.getNumberOfOperands();
operands = new Node[numOperands];
for (int j=0;j<numOperands;j++) {
operands[numOperands-j-1] = (Node) stack.pop();
}
Node result = new Node(o);
result.setChildren(operands);
stack.push(result);
} else {
stack.push(new Node((Ptg)tokens.get(i)));
}
}
return (Node) stack.pop();
}
/** toString on the parser instance returns the RPN ordered list of tokens
* Useful for testing
*/
public String toString() {
StringBuffer buf = new StringBuffer();
for (int i=0;i<tokens.size();i++) {
buf.append( ( (Ptg)tokens.get(i)).toFormulaString(book));
buf.append(' ');
}
return buf.toString();
}
/** Private helper class, used to create a tree representation of the formula*/
private static final class Node {
private Ptg value=null;
private Node[] children=new Node[0];
private int numChild=0;
public Node(Ptg val) {
value = val;
}
public void setChildren(Node[] child) {children = child;numChild=child.length;}
public int getNumChildren() {return numChild;}
public Node getChild(int number) {return children[number];}
public Ptg getValue() {return value;}
}
}

View File

@ -0,0 +1,206 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.model;
import org.apache.poi.hssf.record.formula.AbstractFunctionPtg;
import org.apache.poi.hssf.record.formula.ControlPtg;
import org.apache.poi.hssf.record.formula.ValueOperatorPtg;
import org.apache.poi.hssf.record.formula.Ptg;
/**
* This class performs 'operand class' transformation. Non-base tokens are classified into three
* operand classes:
* <ul>
* <li>reference</li>
* <li>value</li>
* <li>array</li>
* </ul>
* <p/>
*
* The final operand class chosen for each token depends on the formula type and the token's place
* in the formula. If POI gets the operand class wrong, Excel <em>may</em> interpret the formula
* incorrectly. This condition is typically manifested as a formula cell that displays as '#VALUE!',
* but resolves correctly when the user presses F2, enter.<p/>
*
* The logic implemented here was partially inspired by the description in
* "OpenOffice.org's Documentation of the Microsoft Excel File Format". The model presented there
* seems to be inconsistent with observed Excel behaviour (These differences have not been fully
* investigated). The implementation in this class has been heavily modified in order to satisfy
* concrete examples of how Excel performs the same logic (see TestRVA).<p/>
*
* Hopefully, as additional important test cases are identified and added to the test suite,
* patterns might become more obvious in this code and allow for simplification.
*
* @author Josh Micich
*/
final class OperandClassTransformer {
private final int _formulaType;
public OperandClassTransformer(int formulaType) {
_formulaType = formulaType;
}
/**
* Traverses the supplied formula parse tree, calling <tt>Ptg.setClass()</tt> for each non-base
* token to set its operand class.
*/
public void transformFormula(ParseNode rootNode) {
byte rootNodeOperandClass;
switch (_formulaType) {
case FormulaParser.FORMULA_TYPE_CELL:
rootNodeOperandClass = Ptg.CLASS_VALUE;
break;
default:
throw new RuntimeException("Incomplete code - formula type ("
+ _formulaType + ") not supported yet");
}
transformNode(rootNode, rootNodeOperandClass, false);
}
private void transformNode(ParseNode node, byte desiredOperandClass,
boolean callerForceArrayFlag) {
Ptg token = node.getToken();
ParseNode[] children = node.getChildren();
if (token instanceof ValueOperatorPtg || token instanceof ControlPtg) {
// Value Operator Ptgs and Control are base tokens, so token will be unchanged
// but any child nodes are processed according to desiredOperandClass and callerForceArrayFlag
for (int i = 0; i < children.length; i++) {
ParseNode child = children[i];
transformNode(child, desiredOperandClass, callerForceArrayFlag);
}
return;
}
if (token instanceof AbstractFunctionPtg) {
transformFunctionNode((AbstractFunctionPtg) token, children, desiredOperandClass,
callerForceArrayFlag);
return;
}
if (children.length > 0) {
throw new IllegalStateException("Node should not have any children");
}
if (token.isBaseToken()) {
// nothing to do
return;
}
if (callerForceArrayFlag) {
switch (desiredOperandClass) {
case Ptg.CLASS_VALUE:
case Ptg.CLASS_ARRAY:
token.setClass(Ptg.CLASS_ARRAY);
break;
case Ptg.CLASS_REF:
token.setClass(Ptg.CLASS_REF);
break;
default:
throw new IllegalStateException("Unexpected operand class ("
+ desiredOperandClass + ")");
}
} else {
token.setClass(desiredOperandClass);
}
}
private void transformFunctionNode(AbstractFunctionPtg afp, ParseNode[] children,
byte desiredOperandClass, boolean callerForceArrayFlag) {
boolean localForceArrayFlag;
byte defaultReturnOperandClass = afp.getDefaultOperandClass();
if (callerForceArrayFlag) {
switch (defaultReturnOperandClass) {
case Ptg.CLASS_REF:
if (desiredOperandClass == Ptg.CLASS_REF) {
afp.setClass(Ptg.CLASS_REF);
} else {
afp.setClass(Ptg.CLASS_ARRAY);
}
localForceArrayFlag = false;
break;
case Ptg.CLASS_ARRAY:
afp.setClass(Ptg.CLASS_ARRAY);
localForceArrayFlag = false;
break;
case Ptg.CLASS_VALUE:
afp.setClass(Ptg.CLASS_ARRAY);
localForceArrayFlag = true;
break;
default:
throw new IllegalStateException("Unexpected operand class ("
+ defaultReturnOperandClass + ")");
}
} else {
if (defaultReturnOperandClass == desiredOperandClass) {
localForceArrayFlag = false;
// an alternative would have been to for non-base Ptgs to set their operand class
// from their default, but this would require the call in many subclasses because
// the default OC is not known until the end of the constructor
afp.setClass(defaultReturnOperandClass);
} else {
switch (desiredOperandClass) {
case Ptg.CLASS_VALUE:
// always OK to set functions to return 'value'
afp.setClass(Ptg.CLASS_VALUE);
localForceArrayFlag = false;
break;
case Ptg.CLASS_ARRAY:
switch (defaultReturnOperandClass) {
case Ptg.CLASS_REF:
afp.setClass(Ptg.CLASS_REF);
break;
case Ptg.CLASS_VALUE:
afp.setClass(Ptg.CLASS_ARRAY);
break;
default:
throw new IllegalStateException("Unexpected operand class ("
+ defaultReturnOperandClass + ")");
}
localForceArrayFlag = (defaultReturnOperandClass == Ptg.CLASS_VALUE);
break;
case Ptg.CLASS_REF:
switch (defaultReturnOperandClass) {
case Ptg.CLASS_ARRAY:
afp.setClass(Ptg.CLASS_ARRAY);
break;
case Ptg.CLASS_VALUE:
afp.setClass(Ptg.CLASS_VALUE);
break;
default:
throw new IllegalStateException("Unexpected operand class ("
+ defaultReturnOperandClass + ")");
}
localForceArrayFlag = false;
break;
default:
throw new IllegalStateException("Unexpected operand class ("
+ desiredOperandClass + ")");
}
}
}
for (int i = 0; i < children.length; i++) {
ParseNode child = children[i];
byte paramOperandClass = afp.getParameterClass(i);
transformNode(child, paramOperandClass, localForceArrayFlag);
}
}
}

View File

@ -0,0 +1,201 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.model;
import org.apache.poi.hssf.record.formula.AttrPtg;
import org.apache.poi.hssf.record.formula.FuncVarPtg;
import org.apache.poi.hssf.record.formula.Ptg;
import org.apache.poi.hssf.record.formula.function.FunctionMetadataRegistry;
/**
* Represents a syntactic element from a formula by encapsulating the corresponding <tt>Ptg</tt>
* token. Each <tt>ParseNode</tt> may have child <tt>ParseNode</tt>s in the case when the wrapped
* <tt>Ptg</tt> is non-atomic.
*
* @author Josh Micich
*/
final class ParseNode {
public static final ParseNode[] EMPTY_ARRAY = { };
private final Ptg _token;
private final ParseNode[] _children;
private boolean _isIf;
private final int _tokenCount;
public ParseNode(Ptg token, ParseNode[] children) {
_token = token;
_children = children;
_isIf = isIf(token);
int tokenCount = 1;
for (int i = 0; i < children.length; i++) {
tokenCount += children[i].getTokenCount();
}
if (_isIf) {
// there will be 2 or 3 extra tAttr tokens according to whether the false param is present
tokenCount += children.length;
}
_tokenCount = tokenCount;
}
public ParseNode(Ptg token) {
this(token, EMPTY_ARRAY);
}
public ParseNode(Ptg token, ParseNode child0) {
this(token, new ParseNode[] { child0, });
}
public ParseNode(Ptg token, ParseNode child0, ParseNode child1) {
this(token, new ParseNode[] { child0, child1, });
}
private int getTokenCount() {
return _tokenCount;
}
/**
* Collects the array of <tt>Ptg</tt> tokens for the specified tree.
*/
public static Ptg[] toTokenArray(ParseNode rootNode) {
TokenCollector temp = new TokenCollector(rootNode.getTokenCount());
rootNode.collectPtgs(temp);
return temp.getResult();
}
private void collectPtgs(TokenCollector temp) {
if (isIf(getToken())) {
collectIfPtgs(temp);
return;
}
for (int i=0; i< getChildren().length; i++) {
getChildren()[i].collectPtgs(temp);
}
temp.add(getToken());
}
/**
* The IF() function gets marked up with two or three tAttr tokens.
* Similar logic will be required for CHOOSE() when it is supported
*
* See excelfileformat.pdf sec 3.10.5 "tAttr (19H)
*/
private void collectIfPtgs(TokenCollector temp) {
// condition goes first
getChildren()[0].collectPtgs(temp);
// placeholder for tAttrIf
int ifAttrIndex = temp.createPlaceholder();
// true parameter
getChildren()[1].collectPtgs(temp);
// placeholder for first skip attr
int skipAfterTrueParamIndex = temp.createPlaceholder();
int trueParamSize = temp.sumTokenSizes(ifAttrIndex+1, skipAfterTrueParamIndex);
AttrPtg attrIf = new AttrPtg();
attrIf.setOptimizedIf(true);
AttrPtg attrSkipAfterTrue = new AttrPtg();
attrSkipAfterTrue.setGoto(true);
if (getChildren().length > 2) {
// false param present
// false parameter
getChildren()[2].collectPtgs(temp);
int skipAfterFalseParamIndex = temp.createPlaceholder();
AttrPtg attrSkipAfterFalse = new AttrPtg();
attrSkipAfterFalse.setGoto(true);
int falseParamSize = temp.sumTokenSizes(skipAfterTrueParamIndex+1, skipAfterFalseParamIndex);
attrIf.setData((short)(trueParamSize + 4)); // distance to start of false parameter. +4 for skip after true
attrSkipAfterTrue.setData((short)(falseParamSize + 4 + 4 - 1)); // 1 less than distance to end of if FuncVar(size=4). +4 for attr skip before
attrSkipAfterFalse.setData((short)(4 - 1)); // 1 less than distance to end of if FuncVar(size=4).
temp.setPlaceholder(ifAttrIndex, attrIf);
temp.setPlaceholder(skipAfterTrueParamIndex, attrSkipAfterTrue);
temp.setPlaceholder(skipAfterFalseParamIndex, attrSkipAfterFalse);
} else {
// false parameter not present
attrIf.setData((short)(trueParamSize + 4)); // distance to start of FuncVar. +4 for skip after true
attrSkipAfterTrue.setData((short)(4 - 1)); // 1 less than distance to end of if FuncVar(size=4).
temp.setPlaceholder(ifAttrIndex, attrIf);
temp.setPlaceholder(skipAfterTrueParamIndex, attrSkipAfterTrue);
}
temp.add(getToken());
}
private static boolean isIf(Ptg token) {
if (token instanceof FuncVarPtg) {
FuncVarPtg func = (FuncVarPtg) token;
if (FunctionMetadataRegistry.FUNCTION_NAME_IF.equals(func.getName())) {
return true;
}
}
return false;
}
public Ptg getToken() {
return _token;
}
public ParseNode[] getChildren() {
return _children;
}
private static final class TokenCollector {
private final Ptg[] _ptgs;
private int _offset;
public TokenCollector(int tokenCount) {
_ptgs = new Ptg[tokenCount];
_offset = 0;
}
public int sumTokenSizes(int fromIx, int toIx) {
int result = 0;
for (int i=fromIx; i<toIx; i++) {
result += _ptgs[i].getSize();
}
return result;
}
public int createPlaceholder() {
return _offset++;
}
public void add(Ptg token) {
if (token == null) {
throw new IllegalArgumentException("token must not be null");
}
_ptgs[_offset] = token;
_offset++;
}
public void setPlaceholder(int index, Ptg token) {
if (_ptgs[index] != null) {
throw new IllegalStateException("Invalid placeholder index (" + index + ")");
}
_ptgs[index] = token;
}
public Ptg[] getResult() {
return _ptgs;
}
}
}

View File

@ -28,12 +28,11 @@ package org.apache.poi.hssf.record.formula;
* tEndSheet
*/
public abstract class ControlPtg extends Ptg {
public boolean isBaseToken() {
return true;
}
public final byte getDefaultOperandClass() {
// TODO throw new IllegalStateException("Control tokens are not classified");
return Ptg.CLASS_VALUE;
}
throw new IllegalStateException("Control tokens are not classified");
}
}

View File

@ -341,7 +341,9 @@ public abstract class Ptg
ptgClass = thePtgClass;
}
/** returns the class (REF/VALUE/ARRAY) for this Ptg */
/**
* @return the 'operand class' (REF/VALUE/ARRAY) for this Ptg
*/
public byte getPtgClass() {
return ptgClass;
}

View File

@ -15,6 +15,7 @@
# Created by (org.apache.poi.hssf.record.formula.function.ExcelFileFormatDocFunctionExtractor)
# from source file 'excelfileformat.odt' (size=356107, md5=0x8f789cb6e75594caf068f8e193004ef4)
# ! + some manual edits !
#
#Columns: (index, name, minParams, maxParams, returnClass, paramClasses, isVolatile, hasFootnote )
@ -78,8 +79,8 @@
58 NPER 3 5 V V V V V V
59 PMT 3 5 V V V V V V
60 RATE 3 6 V V V V V V V
61 MIRR 3 3 V R V V
62 IRR 1 2 V R V
61 MIRR 3 3 V A V V
62 IRR 1 2 V A V
63 RAND 0 0 V - x
64 MATCH 2 3 V V R R
65 DATE 3 3 V V V V
@ -93,8 +94,8 @@
73 SECOND 1 1 V V
74 NOW 0 0 V - x
75 AREAS 1 1 V R
76 ROWS 1 1 V R
77 COLUMNS 1 1 V R
76 ROWS 1 1 V A
77 COLUMNS 1 1 V A
78 OFFSET 3 5 R R V V V V x
82 SEARCH 2 3 V V V V
83 TRANSPOSE 1 1 A A

Binary file not shown.

View File

@ -34,6 +34,8 @@ public final class AllModelTests {
result.addTestSuite(TestFormulaParser.class);
result.addTestSuite(TestFormulaParserEval.class);
result.addTestSuite(TestFormulaParserIf.class);
result.addTestSuite(TestOperandClassTransformer.class);
result.addTestSuite(TestRVA.class);
result.addTestSuite(TestSheet.class);
result.addTestSuite(TestSheetAdditional.class);
return result;

View File

@ -0,0 +1,110 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.model;
import junit.framework.AssertionFailedError;
import junit.framework.TestCase;
import org.apache.poi.hssf.record.formula.AbstractFunctionPtg;
import org.apache.poi.hssf.record.formula.FuncVarPtg;
import org.apache.poi.hssf.record.formula.Ptg;
/**
* Tests specific formula examples in <tt>OperandClassTransformer</tt>.
*
* @author Josh Micich
*/
public final class TestOperandClassTransformer extends TestCase {
public void testMdeterm() {
String formula = "MDETERM(ABS(A1))";
Ptg[] ptgs = FormulaParser.parse(formula, null);
confirmTokenClass(ptgs, 0, Ptg.CLASS_ARRAY);
confirmFuncClass(ptgs, 1, "ABS", Ptg.CLASS_ARRAY);
confirmFuncClass(ptgs, 2, "MDETERM", Ptg.CLASS_VALUE);
}
/**
* In the example: <code>INDEX(PI(),1)</code>, Excel encodes PI() as 'array'. It is not clear
* what rule justifies this. POI currently encodes it as 'value' which Excel(2007) seems to
* tolerate. Changing the metadata for INDEX to have first parameter as 'array' class breaks
* other formulas involving INDEX. It seems like a special case needs to be made. Perhaps an
* important observation is that INDEX is one of very few functions that returns 'reference' type.
*
* This test has been added but disabled in order to document this issue.
*/
public void DISABLED_testIndexPi1() {
String formula = "INDEX(PI(),1)";
Ptg[] ptgs = FormulaParser.parse(formula, null);
confirmFuncClass(ptgs, 1, "PI", Ptg.CLASS_ARRAY); // fails as of POI 3.1
confirmFuncClass(ptgs, 2, "INDEX", Ptg.CLASS_VALUE);
}
public void testComplexIRR_bug45041() {
String formula = "(1+IRR(SUMIF(A:A,ROW(INDIRECT(MIN(A:A)&\":\"&MAX(A:A))),B:B),0))^365-1";
Ptg[] ptgs = FormulaParser.parse(formula, null);
FuncVarPtg rowFunc = (FuncVarPtg) ptgs[10];
FuncVarPtg sumifFunc = (FuncVarPtg) ptgs[12];
assertEquals("ROW", rowFunc.getName());
assertEquals("SUMIF", sumifFunc.getName());
if (rowFunc.getPtgClass() == Ptg.CLASS_VALUE || sumifFunc.getPtgClass() == Ptg.CLASS_VALUE) {
throw new AssertionFailedError("Identified bug 45041");
}
confirmTokenClass(ptgs, 1, Ptg.CLASS_REF);
confirmTokenClass(ptgs, 2, Ptg.CLASS_REF);
confirmFuncClass(ptgs, 3, "MIN", Ptg.CLASS_VALUE);
confirmTokenClass(ptgs, 6, Ptg.CLASS_REF);
confirmFuncClass(ptgs, 7, "MAX", Ptg.CLASS_VALUE);
confirmFuncClass(ptgs, 9, "INDIRECT", Ptg.CLASS_REF);
confirmFuncClass(ptgs, 10, "ROW", Ptg.CLASS_ARRAY);
confirmTokenClass(ptgs, 11, Ptg.CLASS_REF);
confirmFuncClass(ptgs, 12, "SUMIF", Ptg.CLASS_ARRAY);
confirmFuncClass(ptgs, 14, "IRR", Ptg.CLASS_VALUE);
}
private void confirmFuncClass(Ptg[] ptgs, int i, String expectedFunctionName, byte operandClass) {
confirmTokenClass(ptgs, i, operandClass);
AbstractFunctionPtg afp = (AbstractFunctionPtg) ptgs[i];
assertEquals(expectedFunctionName, afp.getName());
}
private void confirmTokenClass(Ptg[] ptgs, int i, byte operandClass) {
Ptg ptg = ptgs[i];
if (operandClass != ptg.getPtgClass()) {
throw new AssertionFailedError("Wrong operand class for function ptg ("
+ ptg.toString() + "). Expected " + getOperandClassName(operandClass)
+ " but got " + getOperandClassName(ptg.getPtgClass()));
}
}
private static String getOperandClassName(byte ptgClass) {
switch (ptgClass) {
case Ptg.CLASS_REF:
return "R";
case Ptg.CLASS_VALUE:
return "V";
case Ptg.CLASS_ARRAY:
return "A";
}
throw new RuntimeException("Unknown operand class (" + ptgClass + ")");
}
}

View File

@ -0,0 +1,156 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.model;
import junit.framework.AssertionFailedError;
import junit.framework.TestCase;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.record.formula.AttrPtg;
import org.apache.poi.hssf.record.formula.Ptg;
import org.apache.poi.hssf.record.formula.ReferencePtg;
import org.apache.poi.hssf.usermodel.FormulaExtractor;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
/**
* Tests 'operand class' transformation performed by
* <tt>OperandClassTransformer</tt> by comparing its results with those
* directly produced by Excel (in a sample spreadsheet).
*
* @author Josh Micich
*/
public final class TestRVA extends TestCase {
private static final String NEW_LINE = System.getProperty("line.separator");
public void testFormulas() {
HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("testRVA.xls");
HSSFSheet sheet = wb.getSheetAt(0);
int countFailures = 0;
int countErrors = 0;
int rowIx = 0;
while (rowIx < 65535) {
HSSFRow row = sheet.getRow(rowIx);
if (row == null) {
break;
}
HSSFCell cell = row.getCell(0);
if (cell == null || cell.getCellType() == HSSFCell.CELL_TYPE_BLANK) {
break;
}
String formula = cell.getCellFormula();
try {
confirmCell(cell, formula);
} catch (AssertionFailedError e) {
System.err.println("Problem with row[" + rowIx + "] formula '" + formula + "'");
System.err.println(e.getMessage());
countFailures++;
} catch (RuntimeException e) {
System.err.println("Problem with row[" + rowIx + "] formula '" + formula + "'");
countErrors++;
e.printStackTrace();
}
rowIx++;
}
if (countErrors + countFailures > 0) {
String msg = "One or more RVA tests failed: countFailures=" + countFailures
+ " countFailures=" + countErrors + ". See stderr for details.";
throw new AssertionFailedError(msg);
}
}
private void confirmCell(HSSFCell formulaCell, String formula) {
Ptg[] excelPtgs = FormulaExtractor.getPtgs(formulaCell);
Ptg[] poiPtgs = FormulaParser.parse(formula, null);
int nExcelTokens = excelPtgs.length;
int nPoiTokens = poiPtgs.length;
if (nExcelTokens != nPoiTokens) {
if (nExcelTokens == nPoiTokens + 1 && excelPtgs[0].getClass() == AttrPtg.class) {
// compensate for missing tAttrVolatile, which belongs in any formula
// involving OFFSET() et al. POI currently does not insert where required
Ptg[] temp = new Ptg[nExcelTokens];
temp[0] = excelPtgs[0];
System.arraycopy(poiPtgs, 0, temp, 1, nPoiTokens);
poiPtgs = temp;
} else {
throw new RuntimeException("Expected " + nExcelTokens + " tokens but got "
+ nPoiTokens);
}
}
boolean hasMismatch = false;
StringBuffer sb = new StringBuffer();
for (int i = 0; i < nExcelTokens; i++) {
Ptg poiPtg = poiPtgs[i];
Ptg excelPtg = excelPtgs[i];
if (!areTokenClassesSame(poiPtg, excelPtg)) {
hasMismatch = true;
sb.append(" mismatch token type[" + i + "] " + getShortClassName(excelPtg) + " "
+ getOperandClassName(excelPtg) + " - " + getShortClassName(poiPtg) + " "
+ getOperandClassName(poiPtg));
sb.append(NEW_LINE);
continue;
}
if (poiPtg.isBaseToken()) {
continue;
}
sb.append(" token[" + i + "] " + excelPtg.toString() + " "
+ getOperandClassName(excelPtg));
if (excelPtg.getPtgClass() != poiPtg.getPtgClass()) {
hasMismatch = true;
sb.append(" - was " + getOperandClassName(poiPtg));
}
sb.append(NEW_LINE);
}
if (hasMismatch) {
throw new AssertionFailedError(sb.toString());
}
}
private boolean areTokenClassesSame(Ptg poiPtg, Ptg excelPtg) {
if (excelPtg.getClass() == poiPtg.getClass()) {
return true;
}
if (poiPtg.getClass() == ReferencePtg.class) {
// TODO - remove funny subclasses of ReferencePtg
return excelPtg instanceof ReferencePtg;
}
return false;
}
private String getShortClassName(Object o) {
String cn = o.getClass().getName();
int pos = cn.lastIndexOf('.');
return cn.substring(pos + 1);
}
private static String getOperandClassName(Ptg ptg) {
byte ptgClass = ptg.getPtgClass();
switch (ptgClass) {
case Ptg.CLASS_REF: return "R";
case Ptg.CLASS_VALUE: return "V";
case Ptg.CLASS_ARRAY: return "A";
}
throw new RuntimeException("Unknown operand class (" + ptgClass + ")");
}
}

View File

@ -0,0 +1,49 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.usermodel;
import java.util.List;
import org.apache.poi.hssf.record.CellValueRecordInterface;
import org.apache.poi.hssf.record.aggregates.FormulaRecordAggregate;
import org.apache.poi.hssf.record.formula.Ptg;
/**
* Test utility class to get <tt>Ptg</tt> arrays out of formula cells
*
* @author Josh Micich
*/
public final class FormulaExtractor {
private FormulaExtractor() {
// no instances of this class
}
public static Ptg[] getPtgs(HSSFCell cell) {
CellValueRecordInterface vr = cell.getCellValueRecord();
if (!(vr instanceof FormulaRecordAggregate)) {
throw new IllegalArgumentException("Not a formula cell");
}
FormulaRecordAggregate fra = (FormulaRecordAggregate) vr;
List tokens = fra.getFormulaRecord().getParsedExpression();
Ptg[] result = new Ptg[tokens.size()];
tokens.toArray(result);
return result;
}
}