diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 688410c89..831149c3b 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 45060 - Improved token class transformation during formula parsing 44840 - Improved handling of HSSFObjectData, especially for entries with data held not in POIFS 45043 - Support for getting excel cell comments when extracting text Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 9b81f2a01..d9052130c 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 45060 - Improved token class transformation during formula parsing 44840 - Improved handling of HSSFObjectData, especially for entries with data held not in POIFS 45043 - Support for getting excel cell comments when extracting text Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level diff --git a/src/java/org/apache/poi/hssf/model/FormulaParser.java b/src/java/org/apache/poi/hssf/model/FormulaParser.java index 82f54f0ac..508061c03 100644 --- a/src/java/org/apache/poi/hssf/model/FormulaParser.java +++ b/src/java/org/apache/poi/hssf/model/FormulaParser.java @@ -18,7 +18,6 @@ package org.apache.poi.hssf.model; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import java.util.Stack; import java.util.regex.Pattern; @@ -61,17 +60,17 @@ public final class FormulaParser { } } - public static int FORMULA_TYPE_CELL = 0; - public static int FORMULA_TYPE_SHARED = 1; - public static int FORMULA_TYPE_ARRAY =2; - public static int FORMULA_TYPE_CONDFOMRAT = 3; - public static int FORMULA_TYPE_NAMEDRANGE = 4; + public static final int FORMULA_TYPE_CELL = 0; + public static final int FORMULA_TYPE_SHARED = 1; + public static final int FORMULA_TYPE_ARRAY =2; + public static final int FORMULA_TYPE_CONDFOMRAT = 3; + public static final int FORMULA_TYPE_NAMEDRANGE = 4; private final String formulaString; private final int formulaLength; private int pointer; - private final List tokens = new Stack(); + private ParseNode _rootNode; /** * Used for spotting if we have a cell reference, @@ -221,14 +220,15 @@ public final class FormulaParser { return value.length() == 0 ? null : value.toString(); } - /** Parse and Translate a String Identifier */ - private Ptg parseIdent() { - String name; - name = GetName(); + private ParseNode parseFunctionOrIdentifier() { + String name = GetName(); if (look == '('){ //This is a function return function(name); } + return new ParseNode(parseIdentifier(name)); + } + private Ptg parseIdentifier(String name) { if (look == ':' || look == '.') { // this is a AreaReference GetChar(); @@ -287,14 +287,6 @@ public final class FormulaParser { + name + "\", but that named range wasn't defined!"); } - /** - * Adds a pointer to the last token to the latest function argument list. - * @param obj - */ - private void addArgumentPointer(List argumentPointers) { - argumentPointers.add(tokens.get(tokens.size()-1)); - } - /** * Note - Excel function names are 'case aware but not case sensitive'. This method may end * up creating a defined name record in the workbook if the specified name is not an internal @@ -302,58 +294,23 @@ public final class FormulaParser { * * @param name case preserved function name (as it was entered/appeared in the formula). */ - private Ptg function(String name) { - int numArgs =0 ; + private ParseNode function(String name) { + NamePtg nameToken = null; // Note regarding parameter - if(!AbstractFunctionPtg.isInternalFunctionName(name)) { // external functions get a Name token which points to a defined name record - NamePtg nameToken = new NamePtg(name, this.book); + nameToken = new NamePtg(name, this.book); // in the token tree, the name is more or less the first argument - numArgs++; - tokens.add(nameToken); } - //average 2 args per function - List argumentPointers = new ArrayList(2); Match('('); - numArgs += Arguments(argumentPointers); + ParseNode[] args = Arguments(); Match(')'); - return getFunction(name, numArgs, argumentPointers); + return getFunction(name, nameToken, args); } - /** - * Adds the size of all the ptgs after the provided index (inclusive). - *

- * Initially used to count a goto - * @param index - * @return int - */ - private int getPtgSize(int index) { - int count = 0; - - Iterator ptgIterator = tokens.listIterator(index); - while (ptgIterator.hasNext()) { - Ptg ptg = (Ptg)ptgIterator.next(); - count+=ptg.getSize(); - } - - return count; - } - - private int getPtgSize(int start, int end) { - int count = 0; - int index = start; - Iterator ptgIterator = tokens.listIterator(index); - while (ptgIterator.hasNext() && index <= end) { - Ptg ptg = (Ptg)ptgIterator.next(); - count+=ptg.getSize(); - index++; - } - - return count; - } /** * Generates the variable function ptg for the formula. *

@@ -362,84 +319,35 @@ public final class FormulaParser { * @param numArgs * @return Ptg a null is returned if we're in an IF formula, it needs extreme manipulation and is handled in this function */ - private AbstractFunctionPtg getFunction(String name, int numArgs, List argumentPointers) { + private ParseNode getFunction(String name, NamePtg namePtg, ParseNode[] args) { - boolean isVarArgs; - int funcIx; FunctionMetadata fm = FunctionMetadataRegistry.getFunctionByName(name.toUpperCase()); + int numArgs = args.length; if(fm == null) { + if (namePtg == null) { + throw new IllegalStateException("NamePtg must be supplied for external functions"); + } // must be external function - isVarArgs = true; - funcIx = FunctionMetadataRegistry.FUNCTION_INDEX_EXTERNAL; - } else { - isVarArgs = !fm.hasFixedArgsLength(); - funcIx = fm.getIndex(); - validateNumArgs(numArgs, fm); + ParseNode[] allArgs = new ParseNode[numArgs+1]; + allArgs[0] = new ParseNode(namePtg); + System.arraycopy(args, 0, allArgs, 1, numArgs); + return new ParseNode(new FuncVarPtg(name, (byte)(numArgs+1)), allArgs); } + + if (namePtg != null) { + throw new IllegalStateException("NamePtg no applicable to internal functions"); + } + boolean isVarArgs = !fm.hasFixedArgsLength(); + int funcIx = fm.getIndex(); + validateNumArgs(args.length, fm); + AbstractFunctionPtg retval; if(isVarArgs) { retval = new FuncVarPtg(name, (byte)numArgs); } else { retval = new FuncPtg(funcIx); } - if (!name.equalsIgnoreCase(AbstractFunctionPtg.FUNCTION_NAME_IF)) { - // early return for everything else besides IF() - return retval; - } - - - AttrPtg ifPtg = new AttrPtg(); - ifPtg.setData((short)7); //mirroring excel output - ifPtg.setOptimizedIf(true); - - if (argumentPointers.size() != 2 && argumentPointers.size() != 3) { - throw new IllegalArgumentException("["+argumentPointers.size()+"] Arguments Found - An IF formula requires 2 or 3 arguments. IF(CONDITION, TRUE_VALUE, FALSE_VALUE [OPTIONAL]"); - } - - //Biffview of an IF formula record indicates the attr ptg goes after the condition ptgs and are - //tracked in the argument pointers - //The beginning first argument pointer is the last ptg of the condition - int ifIndex = tokens.indexOf(argumentPointers.get(0))+1; - tokens.add(ifIndex, ifPtg); - - //we now need a goto ptgAttr to skip to the end of the formula after a true condition - //the true condition is should be inserted after the last ptg in the first argument - - int gotoIndex = tokens.indexOf(argumentPointers.get(1))+1; - - AttrPtg goto1Ptg = new AttrPtg(); - goto1Ptg.setGoto(true); - - - tokens.add(gotoIndex, goto1Ptg); - - - if (numArgs > 2) { //only add false jump if there is a false condition - - //second goto to skip past the function ptg - AttrPtg goto2Ptg = new AttrPtg(); - goto2Ptg.setGoto(true); - goto2Ptg.setData((short)(retval.getSize()-1)); - //Page 472 of the Microsoft Excel Developer's kit states that: - //The b(or w) field specifies the number byes (or words to skip, minus 1 - - tokens.add(goto2Ptg); //this goes after all the arguments are defined - } - - //data portion of the if ptg points to the false subexpression (Page 472 of MS Excel Developer's kit) - //count the number of bytes after the ifPtg to the False Subexpression - //doesn't specify -1 in the documentation - ifPtg.setData((short)(getPtgSize(ifIndex+1, gotoIndex))); - - //count all the additional (goto) ptgs but dont count itself - int ptgCount = this.getPtgSize(gotoIndex)-goto1Ptg.getSize()+retval.getSize(); - if (ptgCount > Short.MAX_VALUE) { - throw new RuntimeException("Ptg Size exceeds short when being specified for a goto ptg in an if"); - } - - goto1Ptg.setData((short)(ptgCount-1)); - - return retval; + return new ParseNode(retval, args); } private void validateNumArgs(int numArgs, FunctionMetadata fm) { @@ -470,10 +378,12 @@ public final class FormulaParser { } /** get arguments to a function */ - private int Arguments(List argumentPointers) { + private ParseNode[] Arguments() { + //average 2 args per function + List temp = new ArrayList(2); SkipWhite(); if(look == ')') { - return 0; + return ParseNode.EMPTY_ARRAY; } boolean missedPrevArg = true; @@ -482,8 +392,7 @@ public final class FormulaParser { SkipWhite(); if (isArgumentDelimiter(look)) { if (missedPrevArg) { - tokens.add(new MissingArgPtg()); - addArgumentPointer(argumentPointers); + temp.add(new ParseNode(new MissingArgPtg())); numArgs++; } if (look == ')') { @@ -493,8 +402,7 @@ public final class FormulaParser { missedPrevArg = true; continue; } - comparisonExpression(); - addArgumentPointer(argumentPointers); + temp.add(comparisonExpression()); numArgs++; missedPrevArg = false; SkipWhite(); @@ -502,32 +410,34 @@ public final class FormulaParser { throw expected("',' or ')'"); } } - return numArgs; + ParseNode[] result = new ParseNode[temp.size()]; + temp.toArray(result); + return result; } /** Parse and Translate a Math Factor */ - private void powerFactor() { - percentFactor(); + private ParseNode powerFactor() { + ParseNode result = percentFactor(); while(true) { SkipWhite(); if(look != '^') { - return; + return result; } Match('^'); - percentFactor(); - tokens.add(new PowerPtg()); + ParseNode other = percentFactor(); + result = new ParseNode(new PowerPtg(), result, other); } } - private void percentFactor() { - tokens.add(parseSimpleFactor()); + private ParseNode percentFactor() { + ParseNode result = parseSimpleFactor(); while(true) { SkipWhite(); if(look != '%') { - return; + return result; } Match('%'); - tokens.add(new PercentPtg()); + result = new ParseNode(new PercentPtg(), result); } } @@ -535,32 +445,30 @@ public final class FormulaParser { /** * factors (without ^ or % ) */ - private Ptg parseSimpleFactor() { + private ParseNode parseSimpleFactor() { SkipWhite(); switch(look) { case '#': - return parseErrorLiteral(); + return new ParseNode(parseErrorLiteral()); case '-': Match('-'); - powerFactor(); - return new UnaryMinusPtg(); + return new ParseNode(new UnaryMinusPtg(), powerFactor()); case '+': Match('+'); - powerFactor(); - return new UnaryPlusPtg(); + return new ParseNode(new UnaryPlusPtg(), powerFactor()); case '(': Match('('); - comparisonExpression(); + ParseNode inside = comparisonExpression(); Match(')'); - return new ParenthesisPtg(); + return new ParseNode(new ParenthesisPtg(), inside); case '"': - return parseStringLiteral(); + return new ParseNode(parseStringLiteral()); } if (IsAlpha(look) || look == '\''){ - return parseIdent(); + return parseFunctionOrIdentifier(); } // else - assume number - return parseNumber(); + return new ParseNode(parseNumber()); } @@ -716,28 +624,30 @@ public final class FormulaParser { } /** Parse and Translate a Math Term */ - private void Term() { - powerFactor(); + private ParseNode Term() { + ParseNode result = powerFactor(); while(true) { SkipWhite(); + Ptg operator; switch(look) { case '*': Match('*'); - powerFactor(); - tokens.add(new MultiplyPtg()); - continue; + operator = new MultiplyPtg(); + break; case '/': Match('/'); - powerFactor(); - tokens.add(new DividePtg()); - continue; + operator = new DividePtg(); + break; + default: + return result; // finished with Term } - return; // finished with Term + ParseNode other = powerFactor(); + result = new ParseNode(operator, result, other); } } - private void comparisonExpression() { - concatExpression(); + private ParseNode comparisonExpression() { + ParseNode result = concatExpression(); while (true) { SkipWhite(); switch(look) { @@ -745,11 +655,11 @@ public final class FormulaParser { case '>': case '<': Ptg comparisonToken = getComparisonToken(); - concatExpression(); - tokens.add(comparisonToken); + ParseNode other = concatExpression(); + result = new ParseNode(comparisonToken, result, other); continue; } - return; // finished with predicate expression + return result; // finished with predicate expression } } @@ -779,38 +689,41 @@ public final class FormulaParser { } - private void concatExpression() { - additiveExpression(); + private ParseNode concatExpression() { + ParseNode result = additiveExpression(); while (true) { SkipWhite(); if(look != '&') { break; // finished with concat expression } Match('&'); - additiveExpression(); - tokens.add(new ConcatPtg()); + ParseNode other = additiveExpression(); + result = new ParseNode(new ConcatPtg(), result, other); } + return result; } /** Parse and Translate an Expression */ - private void additiveExpression() { - Term(); + private ParseNode additiveExpression() { + ParseNode result = Term(); while (true) { SkipWhite(); + Ptg operator; switch(look) { case '+': Match('+'); - Term(); - tokens.add(new AddPtg()); - continue; + operator = new AddPtg(); + break; case '-': Match('-'); - Term(); - tokens.add(new SubtractPtg()); - continue; + operator = new SubtractPtg(); + break; + default: + return result; // finished with additive expression } - return; // finished with additive expression + ParseNode other = Term(); + result = new ParseNode(operator, result, other); } } @@ -835,7 +748,7 @@ end; public void parse() { pointer=0; GetChar(); - comparisonExpression(); + _rootNode = comparisonExpression(); if(pointer <= formulaLength) { String msg = "Unused input [" + formulaString.substring(pointer-1) @@ -858,91 +771,12 @@ end; } public Ptg[] getRPNPtg(int formulaType) { - Node node = createTree(); + OperandClassTransformer oct = new OperandClassTransformer(formulaType); // RVA is for 'operand class': 'reference', 'value', 'array' - setRootLevelRVA(node, formulaType); - setParameterRVA(node,formulaType); - return (Ptg[]) tokens.toArray(new Ptg[0]); + oct.transformFormula(_rootNode); + return ParseNode.toTokenArray(_rootNode); } - private void setRootLevelRVA(Node n, int formulaType) { - //Pg 16, excelfileformat.pdf @ openoffice.org - Ptg p = n.getValue(); - if (formulaType == FormulaParser.FORMULA_TYPE_NAMEDRANGE) { - if (p.getDefaultOperandClass() == Ptg.CLASS_REF) { - setClass(n,Ptg.CLASS_REF); - } else { - setClass(n,Ptg.CLASS_ARRAY); - } - } else { - setClass(n,Ptg.CLASS_VALUE); - } - - } - - private void setParameterRVA(Node n, int formulaType) { - Ptg p = n.getValue(); - int numOperands = n.getNumChildren(); - if (p instanceof AbstractFunctionPtg) { - for (int i =0;i + *

  • reference
  • + *
  • value
  • + *
  • array
  • + * + *

    + * + * The final operand class chosen for each token depends on the formula type and the token's place + * in the formula. If POI gets the operand class wrong, Excel may interpret the formula + * incorrectly. This condition is typically manifested as a formula cell that displays as '#VALUE!', + * but resolves correctly when the user presses F2, enter.

    + * + * The logic implemented here was partially inspired by the description in + * "OpenOffice.org's Documentation of the Microsoft Excel File Format". The model presented there + * seems to be inconsistent with observed Excel behaviour (These differences have not been fully + * investigated). The implementation in this class has been heavily modified in order to satisfy + * concrete examples of how Excel performs the same logic (see TestRVA).

    + * + * Hopefully, as additional important test cases are identified and added to the test suite, + * patterns might become more obvious in this code and allow for simplification. + * + * @author Josh Micich + */ +final class OperandClassTransformer { + + private final int _formulaType; + + public OperandClassTransformer(int formulaType) { + _formulaType = formulaType; + } + + /** + * Traverses the supplied formula parse tree, calling Ptg.setClass() for each non-base + * token to set its operand class. + */ + public void transformFormula(ParseNode rootNode) { + byte rootNodeOperandClass; + switch (_formulaType) { + case FormulaParser.FORMULA_TYPE_CELL: + rootNodeOperandClass = Ptg.CLASS_VALUE; + break; + default: + throw new RuntimeException("Incomplete code - formula type (" + + _formulaType + ") not supported yet"); + + } + transformNode(rootNode, rootNodeOperandClass, false); + } + + private void transformNode(ParseNode node, byte desiredOperandClass, + boolean callerForceArrayFlag) { + Ptg token = node.getToken(); + ParseNode[] children = node.getChildren(); + if (token instanceof ValueOperatorPtg || token instanceof ControlPtg) { + // Value Operator Ptgs and Control are base tokens, so token will be unchanged + + // but any child nodes are processed according to desiredOperandClass and callerForceArrayFlag + for (int i = 0; i < children.length; i++) { + ParseNode child = children[i]; + transformNode(child, desiredOperandClass, callerForceArrayFlag); + } + return; + } + if (token instanceof AbstractFunctionPtg) { + transformFunctionNode((AbstractFunctionPtg) token, children, desiredOperandClass, + callerForceArrayFlag); + return; + } + if (children.length > 0) { + throw new IllegalStateException("Node should not have any children"); + } + + if (token.isBaseToken()) { + // nothing to do + return; + } + if (callerForceArrayFlag) { + switch (desiredOperandClass) { + case Ptg.CLASS_VALUE: + case Ptg.CLASS_ARRAY: + token.setClass(Ptg.CLASS_ARRAY); + break; + case Ptg.CLASS_REF: + token.setClass(Ptg.CLASS_REF); + break; + default: + throw new IllegalStateException("Unexpected operand class (" + + desiredOperandClass + ")"); + } + } else { + token.setClass(desiredOperandClass); + } + } + + private void transformFunctionNode(AbstractFunctionPtg afp, ParseNode[] children, + byte desiredOperandClass, boolean callerForceArrayFlag) { + + boolean localForceArrayFlag; + byte defaultReturnOperandClass = afp.getDefaultOperandClass(); + + if (callerForceArrayFlag) { + switch (defaultReturnOperandClass) { + case Ptg.CLASS_REF: + if (desiredOperandClass == Ptg.CLASS_REF) { + afp.setClass(Ptg.CLASS_REF); + } else { + afp.setClass(Ptg.CLASS_ARRAY); + } + localForceArrayFlag = false; + break; + case Ptg.CLASS_ARRAY: + afp.setClass(Ptg.CLASS_ARRAY); + localForceArrayFlag = false; + break; + case Ptg.CLASS_VALUE: + afp.setClass(Ptg.CLASS_ARRAY); + localForceArrayFlag = true; + break; + default: + throw new IllegalStateException("Unexpected operand class (" + + defaultReturnOperandClass + ")"); + } + } else { + if (defaultReturnOperandClass == desiredOperandClass) { + localForceArrayFlag = false; + // an alternative would have been to for non-base Ptgs to set their operand class + // from their default, but this would require the call in many subclasses because + // the default OC is not known until the end of the constructor + afp.setClass(defaultReturnOperandClass); + } else { + switch (desiredOperandClass) { + case Ptg.CLASS_VALUE: + // always OK to set functions to return 'value' + afp.setClass(Ptg.CLASS_VALUE); + localForceArrayFlag = false; + break; + case Ptg.CLASS_ARRAY: + switch (defaultReturnOperandClass) { + case Ptg.CLASS_REF: + afp.setClass(Ptg.CLASS_REF); + break; + case Ptg.CLASS_VALUE: + afp.setClass(Ptg.CLASS_ARRAY); + break; + default: + throw new IllegalStateException("Unexpected operand class (" + + defaultReturnOperandClass + ")"); + } + localForceArrayFlag = (defaultReturnOperandClass == Ptg.CLASS_VALUE); + break; + case Ptg.CLASS_REF: + switch (defaultReturnOperandClass) { + case Ptg.CLASS_ARRAY: + afp.setClass(Ptg.CLASS_ARRAY); + break; + case Ptg.CLASS_VALUE: + afp.setClass(Ptg.CLASS_VALUE); + break; + default: + throw new IllegalStateException("Unexpected operand class (" + + defaultReturnOperandClass + ")"); + } + localForceArrayFlag = false; + break; + default: + throw new IllegalStateException("Unexpected operand class (" + + desiredOperandClass + ")"); + } + + } + } + + for (int i = 0; i < children.length; i++) { + ParseNode child = children[i]; + byte paramOperandClass = afp.getParameterClass(i); + transformNode(child, paramOperandClass, localForceArrayFlag); + } + } +} diff --git a/src/java/org/apache/poi/hssf/model/ParseNode.java b/src/java/org/apache/poi/hssf/model/ParseNode.java new file mode 100644 index 000000000..acd8cb12b --- /dev/null +++ b/src/java/org/apache/poi/hssf/model/ParseNode.java @@ -0,0 +1,201 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hssf.model; + +import org.apache.poi.hssf.record.formula.AttrPtg; +import org.apache.poi.hssf.record.formula.FuncVarPtg; +import org.apache.poi.hssf.record.formula.Ptg; +import org.apache.poi.hssf.record.formula.function.FunctionMetadataRegistry; +/** + * Represents a syntactic element from a formula by encapsulating the corresponding Ptg + * token. Each ParseNode may have child ParseNodes in the case when the wrapped + * Ptg is non-atomic. + * + * @author Josh Micich + */ +final class ParseNode { + + public static final ParseNode[] EMPTY_ARRAY = { }; + private final Ptg _token; + private final ParseNode[] _children; + private boolean _isIf; + private final int _tokenCount; + + public ParseNode(Ptg token, ParseNode[] children) { + _token = token; + _children = children; + _isIf = isIf(token); + int tokenCount = 1; + for (int i = 0; i < children.length; i++) { + tokenCount += children[i].getTokenCount(); + } + if (_isIf) { + // there will be 2 or 3 extra tAttr tokens according to whether the false param is present + tokenCount += children.length; + } + _tokenCount = tokenCount; + } + public ParseNode(Ptg token) { + this(token, EMPTY_ARRAY); + } + public ParseNode(Ptg token, ParseNode child0) { + this(token, new ParseNode[] { child0, }); + } + public ParseNode(Ptg token, ParseNode child0, ParseNode child1) { + this(token, new ParseNode[] { child0, child1, }); + } + private int getTokenCount() { + return _tokenCount; + } + + /** + * Collects the array of Ptg tokens for the specified tree. + */ + public static Ptg[] toTokenArray(ParseNode rootNode) { + TokenCollector temp = new TokenCollector(rootNode.getTokenCount()); + rootNode.collectPtgs(temp); + return temp.getResult(); + } + private void collectPtgs(TokenCollector temp) { + if (isIf(getToken())) { + collectIfPtgs(temp); + return; + } + for (int i=0; i< getChildren().length; i++) { + getChildren()[i].collectPtgs(temp); + } + temp.add(getToken()); + } + /** + * The IF() function gets marked up with two or three tAttr tokens. + * Similar logic will be required for CHOOSE() when it is supported + * + * See excelfileformat.pdf sec 3.10.5 "tAttr (19H) + */ + private void collectIfPtgs(TokenCollector temp) { + + // condition goes first + getChildren()[0].collectPtgs(temp); + + // placeholder for tAttrIf + int ifAttrIndex = temp.createPlaceholder(); + + // true parameter + getChildren()[1].collectPtgs(temp); + + // placeholder for first skip attr + int skipAfterTrueParamIndex = temp.createPlaceholder(); + int trueParamSize = temp.sumTokenSizes(ifAttrIndex+1, skipAfterTrueParamIndex); + + AttrPtg attrIf = new AttrPtg(); + attrIf.setOptimizedIf(true); + AttrPtg attrSkipAfterTrue = new AttrPtg(); + attrSkipAfterTrue.setGoto(true); + + if (getChildren().length > 2) { + // false param present + + // false parameter + getChildren()[2].collectPtgs(temp); + + int skipAfterFalseParamIndex = temp.createPlaceholder(); + + AttrPtg attrSkipAfterFalse = new AttrPtg(); + attrSkipAfterFalse.setGoto(true); + + int falseParamSize = temp.sumTokenSizes(skipAfterTrueParamIndex+1, skipAfterFalseParamIndex); + + attrIf.setData((short)(trueParamSize + 4)); // distance to start of false parameter. +4 for skip after true + attrSkipAfterTrue.setData((short)(falseParamSize + 4 + 4 - 1)); // 1 less than distance to end of if FuncVar(size=4). +4 for attr skip before + attrSkipAfterFalse.setData((short)(4 - 1)); // 1 less than distance to end of if FuncVar(size=4). + + temp.setPlaceholder(ifAttrIndex, attrIf); + temp.setPlaceholder(skipAfterTrueParamIndex, attrSkipAfterTrue); + temp.setPlaceholder(skipAfterFalseParamIndex, attrSkipAfterFalse); + } else { + // false parameter not present + attrIf.setData((short)(trueParamSize + 4)); // distance to start of FuncVar. +4 for skip after true + attrSkipAfterTrue.setData((short)(4 - 1)); // 1 less than distance to end of if FuncVar(size=4). + + temp.setPlaceholder(ifAttrIndex, attrIf); + temp.setPlaceholder(skipAfterTrueParamIndex, attrSkipAfterTrue); + } + + temp.add(getToken()); + } + + private static boolean isIf(Ptg token) { + if (token instanceof FuncVarPtg) { + FuncVarPtg func = (FuncVarPtg) token; + if (FunctionMetadataRegistry.FUNCTION_NAME_IF.equals(func.getName())) { + return true; + } + } + return false; + } + + public Ptg getToken() { + return _token; + } + + public ParseNode[] getChildren() { + return _children; + } + + private static final class TokenCollector { + + private final Ptg[] _ptgs; + private int _offset; + + public TokenCollector(int tokenCount) { + _ptgs = new Ptg[tokenCount]; + _offset = 0; + } + + public int sumTokenSizes(int fromIx, int toIx) { + int result = 0; + for (int i=fromIx; iOperandClassTransformer. + * + * @author Josh Micich + */ +public final class TestOperandClassTransformer extends TestCase { + + public void testMdeterm() { + String formula = "MDETERM(ABS(A1))"; + Ptg[] ptgs = FormulaParser.parse(formula, null); + + confirmTokenClass(ptgs, 0, Ptg.CLASS_ARRAY); + confirmFuncClass(ptgs, 1, "ABS", Ptg.CLASS_ARRAY); + confirmFuncClass(ptgs, 2, "MDETERM", Ptg.CLASS_VALUE); + } + + /** + * In the example: INDEX(PI(),1), Excel encodes PI() as 'array'. It is not clear + * what rule justifies this. POI currently encodes it as 'value' which Excel(2007) seems to + * tolerate. Changing the metadata for INDEX to have first parameter as 'array' class breaks + * other formulas involving INDEX. It seems like a special case needs to be made. Perhaps an + * important observation is that INDEX is one of very few functions that returns 'reference' type. + * + * This test has been added but disabled in order to document this issue. + */ + public void DISABLED_testIndexPi1() { + String formula = "INDEX(PI(),1)"; + Ptg[] ptgs = FormulaParser.parse(formula, null); + + confirmFuncClass(ptgs, 1, "PI", Ptg.CLASS_ARRAY); // fails as of POI 3.1 + confirmFuncClass(ptgs, 2, "INDEX", Ptg.CLASS_VALUE); + } + + public void testComplexIRR_bug45041() { + String formula = "(1+IRR(SUMIF(A:A,ROW(INDIRECT(MIN(A:A)&\":\"&MAX(A:A))),B:B),0))^365-1"; + Ptg[] ptgs = FormulaParser.parse(formula, null); + + FuncVarPtg rowFunc = (FuncVarPtg) ptgs[10]; + FuncVarPtg sumifFunc = (FuncVarPtg) ptgs[12]; + assertEquals("ROW", rowFunc.getName()); + assertEquals("SUMIF", sumifFunc.getName()); + + if (rowFunc.getPtgClass() == Ptg.CLASS_VALUE || sumifFunc.getPtgClass() == Ptg.CLASS_VALUE) { + throw new AssertionFailedError("Identified bug 45041"); + } + confirmTokenClass(ptgs, 1, Ptg.CLASS_REF); + confirmTokenClass(ptgs, 2, Ptg.CLASS_REF); + confirmFuncClass(ptgs, 3, "MIN", Ptg.CLASS_VALUE); + confirmTokenClass(ptgs, 6, Ptg.CLASS_REF); + confirmFuncClass(ptgs, 7, "MAX", Ptg.CLASS_VALUE); + confirmFuncClass(ptgs, 9, "INDIRECT", Ptg.CLASS_REF); + confirmFuncClass(ptgs, 10, "ROW", Ptg.CLASS_ARRAY); + confirmTokenClass(ptgs, 11, Ptg.CLASS_REF); + confirmFuncClass(ptgs, 12, "SUMIF", Ptg.CLASS_ARRAY); + confirmFuncClass(ptgs, 14, "IRR", Ptg.CLASS_VALUE); + } + + private void confirmFuncClass(Ptg[] ptgs, int i, String expectedFunctionName, byte operandClass) { + confirmTokenClass(ptgs, i, operandClass); + AbstractFunctionPtg afp = (AbstractFunctionPtg) ptgs[i]; + assertEquals(expectedFunctionName, afp.getName()); + } + + private void confirmTokenClass(Ptg[] ptgs, int i, byte operandClass) { + Ptg ptg = ptgs[i]; + if (operandClass != ptg.getPtgClass()) { + throw new AssertionFailedError("Wrong operand class for function ptg (" + + ptg.toString() + "). Expected " + getOperandClassName(operandClass) + + " but got " + getOperandClassName(ptg.getPtgClass())); + } + } + + private static String getOperandClassName(byte ptgClass) { + switch (ptgClass) { + case Ptg.CLASS_REF: + return "R"; + case Ptg.CLASS_VALUE: + return "V"; + case Ptg.CLASS_ARRAY: + return "A"; + } + throw new RuntimeException("Unknown operand class (" + ptgClass + ")"); + } +} diff --git a/src/testcases/org/apache/poi/hssf/model/TestRVA.java b/src/testcases/org/apache/poi/hssf/model/TestRVA.java new file mode 100644 index 000000000..cb51c17bd --- /dev/null +++ b/src/testcases/org/apache/poi/hssf/model/TestRVA.java @@ -0,0 +1,156 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hssf.model; + +import junit.framework.AssertionFailedError; +import junit.framework.TestCase; + +import org.apache.poi.hssf.HSSFTestDataSamples; +import org.apache.poi.hssf.record.formula.AttrPtg; +import org.apache.poi.hssf.record.formula.Ptg; +import org.apache.poi.hssf.record.formula.ReferencePtg; +import org.apache.poi.hssf.usermodel.FormulaExtractor; +import org.apache.poi.hssf.usermodel.HSSFCell; +import org.apache.poi.hssf.usermodel.HSSFRow; +import org.apache.poi.hssf.usermodel.HSSFSheet; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; + +/** + * Tests 'operand class' transformation performed by + * OperandClassTransformer by comparing its results with those + * directly produced by Excel (in a sample spreadsheet). + * + * @author Josh Micich + */ +public final class TestRVA extends TestCase { + + private static final String NEW_LINE = System.getProperty("line.separator"); + + public void testFormulas() { + HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("testRVA.xls"); + HSSFSheet sheet = wb.getSheetAt(0); + + int countFailures = 0; + int countErrors = 0; + + int rowIx = 0; + while (rowIx < 65535) { + HSSFRow row = sheet.getRow(rowIx); + if (row == null) { + break; + } + HSSFCell cell = row.getCell(0); + if (cell == null || cell.getCellType() == HSSFCell.CELL_TYPE_BLANK) { + break; + } + String formula = cell.getCellFormula(); + try { + confirmCell(cell, formula); + } catch (AssertionFailedError e) { + System.err.println("Problem with row[" + rowIx + "] formula '" + formula + "'"); + System.err.println(e.getMessage()); + countFailures++; + } catch (RuntimeException e) { + System.err.println("Problem with row[" + rowIx + "] formula '" + formula + "'"); + countErrors++; + e.printStackTrace(); + } + rowIx++; + } + if (countErrors + countFailures > 0) { + String msg = "One or more RVA tests failed: countFailures=" + countFailures + + " countFailures=" + countErrors + ". See stderr for details."; + throw new AssertionFailedError(msg); + } + } + + private void confirmCell(HSSFCell formulaCell, String formula) { + Ptg[] excelPtgs = FormulaExtractor.getPtgs(formulaCell); + Ptg[] poiPtgs = FormulaParser.parse(formula, null); + int nExcelTokens = excelPtgs.length; + int nPoiTokens = poiPtgs.length; + if (nExcelTokens != nPoiTokens) { + if (nExcelTokens == nPoiTokens + 1 && excelPtgs[0].getClass() == AttrPtg.class) { + // compensate for missing tAttrVolatile, which belongs in any formula + // involving OFFSET() et al. POI currently does not insert where required + Ptg[] temp = new Ptg[nExcelTokens]; + temp[0] = excelPtgs[0]; + System.arraycopy(poiPtgs, 0, temp, 1, nPoiTokens); + poiPtgs = temp; + } else { + throw new RuntimeException("Expected " + nExcelTokens + " tokens but got " + + nPoiTokens); + } + } + boolean hasMismatch = false; + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < nExcelTokens; i++) { + Ptg poiPtg = poiPtgs[i]; + Ptg excelPtg = excelPtgs[i]; + if (!areTokenClassesSame(poiPtg, excelPtg)) { + hasMismatch = true; + sb.append(" mismatch token type[" + i + "] " + getShortClassName(excelPtg) + " " + + getOperandClassName(excelPtg) + " - " + getShortClassName(poiPtg) + " " + + getOperandClassName(poiPtg)); + sb.append(NEW_LINE); + continue; + } + if (poiPtg.isBaseToken()) { + continue; + } + sb.append(" token[" + i + "] " + excelPtg.toString() + " " + + getOperandClassName(excelPtg)); + + if (excelPtg.getPtgClass() != poiPtg.getPtgClass()) { + hasMismatch = true; + sb.append(" - was " + getOperandClassName(poiPtg)); + } + sb.append(NEW_LINE); + } + if (hasMismatch) { + throw new AssertionFailedError(sb.toString()); + } + } + + private boolean areTokenClassesSame(Ptg poiPtg, Ptg excelPtg) { + if (excelPtg.getClass() == poiPtg.getClass()) { + return true; + } + if (poiPtg.getClass() == ReferencePtg.class) { + // TODO - remove funny subclasses of ReferencePtg + return excelPtg instanceof ReferencePtg; + } + return false; + } + + private String getShortClassName(Object o) { + String cn = o.getClass().getName(); + int pos = cn.lastIndexOf('.'); + return cn.substring(pos + 1); + } + + private static String getOperandClassName(Ptg ptg) { + byte ptgClass = ptg.getPtgClass(); + switch (ptgClass) { + case Ptg.CLASS_REF: return "R"; + case Ptg.CLASS_VALUE: return "V"; + case Ptg.CLASS_ARRAY: return "A"; + } + throw new RuntimeException("Unknown operand class (" + ptgClass + ")"); + } +} diff --git a/src/testcases/org/apache/poi/hssf/usermodel/FormulaExtractor.java b/src/testcases/org/apache/poi/hssf/usermodel/FormulaExtractor.java new file mode 100644 index 000000000..d657647ea --- /dev/null +++ b/src/testcases/org/apache/poi/hssf/usermodel/FormulaExtractor.java @@ -0,0 +1,49 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hssf.usermodel; + +import java.util.List; + +import org.apache.poi.hssf.record.CellValueRecordInterface; +import org.apache.poi.hssf.record.aggregates.FormulaRecordAggregate; +import org.apache.poi.hssf.record.formula.Ptg; + +/** + * Test utility class to get Ptg arrays out of formula cells + * + * @author Josh Micich + */ +public final class FormulaExtractor { + + private FormulaExtractor() { + // no instances of this class + } + + public static Ptg[] getPtgs(HSSFCell cell) { + CellValueRecordInterface vr = cell.getCellValueRecord(); + if (!(vr instanceof FormulaRecordAggregate)) { + throw new IllegalArgumentException("Not a formula cell"); + } + FormulaRecordAggregate fra = (FormulaRecordAggregate) vr; + List tokens = fra.getFormulaRecord().getParsedExpression(); + Ptg[] result = new Ptg[tokens.size()]; + tokens.toArray(result); + return result; + } + +}