From a811443b4b3f915fbe22accaa433ea06a40e4654 Mon Sep 17 00:00:00 2001 From: Josh Micich Date: Wed, 28 May 2008 06:19:31 +0000 Subject: [PATCH] Fix for 45060 (and 45041) - Improved token class transformation during formula parsing git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@660828 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/changes.xml | 1 + src/documentation/content/xdocs/status.xml | 1 + .../apache/poi/hssf/model/FormulaParser.java | 415 ++++-------------- .../hssf/model/OperandClassTransformer.java | 206 +++++++++ .../org/apache/poi/hssf/model/ParseNode.java | 201 +++++++++ .../poi/hssf/record/formula/ControlPtg.java | 7 +- .../apache/poi/hssf/record/formula/Ptg.java | 4 +- .../formula/function/functionMetadata.txt | 9 +- .../org/apache/poi/hssf/data/testRVA.xls | Bin 0 -> 32768 bytes .../apache/poi/hssf/model/AllModelTests.java | 2 + .../model/TestOperandClassTransformer.java | 110 +++++ .../org/apache/poi/hssf/model/TestRVA.java | 156 +++++++ .../poi/hssf/usermodel/FormulaExtractor.java | 49 +++ 13 files changed, 834 insertions(+), 327 deletions(-) create mode 100644 src/java/org/apache/poi/hssf/model/OperandClassTransformer.java create mode 100644 src/java/org/apache/poi/hssf/model/ParseNode.java create mode 100644 src/testcases/org/apache/poi/hssf/data/testRVA.xls create mode 100644 src/testcases/org/apache/poi/hssf/model/TestOperandClassTransformer.java create mode 100644 src/testcases/org/apache/poi/hssf/model/TestRVA.java create mode 100644 src/testcases/org/apache/poi/hssf/usermodel/FormulaExtractor.java diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 688410c89..831149c3b 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 45060 - Improved token class transformation during formula parsing 44840 - Improved handling of HSSFObjectData, especially for entries with data held not in POIFS 45043 - Support for getting excel cell comments when extracting text Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 9b81f2a01..d9052130c 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 45060 - Improved token class transformation during formula parsing 44840 - Improved handling of HSSFObjectData, especially for entries with data held not in POIFS 45043 - Support for getting excel cell comments when extracting text Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level diff --git a/src/java/org/apache/poi/hssf/model/FormulaParser.java b/src/java/org/apache/poi/hssf/model/FormulaParser.java index 82f54f0ac..508061c03 100644 --- a/src/java/org/apache/poi/hssf/model/FormulaParser.java +++ b/src/java/org/apache/poi/hssf/model/FormulaParser.java @@ -18,7 +18,6 @@ package org.apache.poi.hssf.model; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import java.util.Stack; import java.util.regex.Pattern; @@ -61,17 +60,17 @@ public final class FormulaParser { } } - public static int FORMULA_TYPE_CELL = 0; - public static int FORMULA_TYPE_SHARED = 1; - public static int FORMULA_TYPE_ARRAY =2; - public static int FORMULA_TYPE_CONDFOMRAT = 3; - public static int FORMULA_TYPE_NAMEDRANGE = 4; + public static final int FORMULA_TYPE_CELL = 0; + public static final int FORMULA_TYPE_SHARED = 1; + public static final int FORMULA_TYPE_ARRAY =2; + public static final int FORMULA_TYPE_CONDFOMRAT = 3; + public static final int FORMULA_TYPE_NAMEDRANGE = 4; private final String formulaString; private final int formulaLength; private int pointer; - private final List tokens = new Stack(); + private ParseNode _rootNode; /** * Used for spotting if we have a cell reference, @@ -221,14 +220,15 @@ public final class FormulaParser { return value.length() == 0 ? null : value.toString(); } - /** Parse and Translate a String Identifier */ - private Ptg parseIdent() { - String name; - name = GetName(); + private ParseNode parseFunctionOrIdentifier() { + String name = GetName(); if (look == '('){ //This is a function return function(name); } + return new ParseNode(parseIdentifier(name)); + } + private Ptg parseIdentifier(String name) { if (look == ':' || look == '.') { // this is a AreaReference GetChar(); @@ -287,14 +287,6 @@ public final class FormulaParser { + name + "\", but that named range wasn't defined!"); } - /** - * Adds a pointer to the last token to the latest function argument list. - * @param obj - */ - private void addArgumentPointer(List argumentPointers) { - argumentPointers.add(tokens.get(tokens.size()-1)); - } - /** * Note - Excel function names are 'case aware but not case sensitive'. This method may end * up creating a defined name record in the workbook if the specified name is not an internal @@ -302,58 +294,23 @@ public final class FormulaParser { * * @param name case preserved function name (as it was entered/appeared in the formula). */ - private Ptg function(String name) { - int numArgs =0 ; + private ParseNode function(String name) { + NamePtg nameToken = null; // Note regarding parameter - if(!AbstractFunctionPtg.isInternalFunctionName(name)) { // external functions get a Name token which points to a defined name record - NamePtg nameToken = new NamePtg(name, this.book); + nameToken = new NamePtg(name, this.book); // in the token tree, the name is more or less the first argument - numArgs++; - tokens.add(nameToken); } - //average 2 args per function - List argumentPointers = new ArrayList(2); Match('('); - numArgs += Arguments(argumentPointers); + ParseNode[] args = Arguments(); Match(')'); - return getFunction(name, numArgs, argumentPointers); + return getFunction(name, nameToken, args); } - /** - * Adds the size of all the ptgs after the provided index (inclusive). - *

- * Initially used to count a goto - * @param index - * @return int - */ - private int getPtgSize(int index) { - int count = 0; - - Iterator ptgIterator = tokens.listIterator(index); - while (ptgIterator.hasNext()) { - Ptg ptg = (Ptg)ptgIterator.next(); - count+=ptg.getSize(); - } - - return count; - } - - private int getPtgSize(int start, int end) { - int count = 0; - int index = start; - Iterator ptgIterator = tokens.listIterator(index); - while (ptgIterator.hasNext() && index <= end) { - Ptg ptg = (Ptg)ptgIterator.next(); - count+=ptg.getSize(); - index++; - } - - return count; - } /** * Generates the variable function ptg for the formula. *

@@ -362,84 +319,35 @@ public final class FormulaParser { * @param numArgs * @return Ptg a null is returned if we're in an IF formula, it needs extreme manipulation and is handled in this function */ - private AbstractFunctionPtg getFunction(String name, int numArgs, List argumentPointers) { + private ParseNode getFunction(String name, NamePtg namePtg, ParseNode[] args) { - boolean isVarArgs; - int funcIx; FunctionMetadata fm = FunctionMetadataRegistry.getFunctionByName(name.toUpperCase()); + int numArgs = args.length; if(fm == null) { + if (namePtg == null) { + throw new IllegalStateException("NamePtg must be supplied for external functions"); + } // must be external function - isVarArgs = true; - funcIx = FunctionMetadataRegistry.FUNCTION_INDEX_EXTERNAL; - } else { - isVarArgs = !fm.hasFixedArgsLength(); - funcIx = fm.getIndex(); - validateNumArgs(numArgs, fm); + ParseNode[] allArgs = new ParseNode[numArgs+1]; + allArgs[0] = new ParseNode(namePtg); + System.arraycopy(args, 0, allArgs, 1, numArgs); + return new ParseNode(new FuncVarPtg(name, (byte)(numArgs+1)), allArgs); } + + if (namePtg != null) { + throw new IllegalStateException("NamePtg no applicable to internal functions"); + } + boolean isVarArgs = !fm.hasFixedArgsLength(); + int funcIx = fm.getIndex(); + validateNumArgs(args.length, fm); + AbstractFunctionPtg retval; if(isVarArgs) { retval = new FuncVarPtg(name, (byte)numArgs); } else { retval = new FuncPtg(funcIx); } - if (!name.equalsIgnoreCase(AbstractFunctionPtg.FUNCTION_NAME_IF)) { - // early return for everything else besides IF() - return retval; - } - - - AttrPtg ifPtg = new AttrPtg(); - ifPtg.setData((short)7); //mirroring excel output - ifPtg.setOptimizedIf(true); - - if (argumentPointers.size() != 2 && argumentPointers.size() != 3) { - throw new IllegalArgumentException("["+argumentPointers.size()+"] Arguments Found - An IF formula requires 2 or 3 arguments. IF(CONDITION, TRUE_VALUE, FALSE_VALUE [OPTIONAL]"); - } - - //Biffview of an IF formula record indicates the attr ptg goes after the condition ptgs and are - //tracked in the argument pointers - //The beginning first argument pointer is the last ptg of the condition - int ifIndex = tokens.indexOf(argumentPointers.get(0))+1; - tokens.add(ifIndex, ifPtg); - - //we now need a goto ptgAttr to skip to the end of the formula after a true condition - //the true condition is should be inserted after the last ptg in the first argument - - int gotoIndex = tokens.indexOf(argumentPointers.get(1))+1; - - AttrPtg goto1Ptg = new AttrPtg(); - goto1Ptg.setGoto(true); - - - tokens.add(gotoIndex, goto1Ptg); - - - if (numArgs > 2) { //only add false jump if there is a false condition - - //second goto to skip past the function ptg - AttrPtg goto2Ptg = new AttrPtg(); - goto2Ptg.setGoto(true); - goto2Ptg.setData((short)(retval.getSize()-1)); - //Page 472 of the Microsoft Excel Developer's kit states that: - //The b(or w) field specifies the number byes (or words to skip, minus 1 - - tokens.add(goto2Ptg); //this goes after all the arguments are defined - } - - //data portion of the if ptg points to the false subexpression (Page 472 of MS Excel Developer's kit) - //count the number of bytes after the ifPtg to the False Subexpression - //doesn't specify -1 in the documentation - ifPtg.setData((short)(getPtgSize(ifIndex+1, gotoIndex))); - - //count all the additional (goto) ptgs but dont count itself - int ptgCount = this.getPtgSize(gotoIndex)-goto1Ptg.getSize()+retval.getSize(); - if (ptgCount > Short.MAX_VALUE) { - throw new RuntimeException("Ptg Size exceeds short when being specified for a goto ptg in an if"); - } - - goto1Ptg.setData((short)(ptgCount-1)); - - return retval; + return new ParseNode(retval, args); } private void validateNumArgs(int numArgs, FunctionMetadata fm) { @@ -470,10 +378,12 @@ public final class FormulaParser { } /** get arguments to a function */ - private int Arguments(List argumentPointers) { + private ParseNode[] Arguments() { + //average 2 args per function + List temp = new ArrayList(2); SkipWhite(); if(look == ')') { - return 0; + return ParseNode.EMPTY_ARRAY; } boolean missedPrevArg = true; @@ -482,8 +392,7 @@ public final class FormulaParser { SkipWhite(); if (isArgumentDelimiter(look)) { if (missedPrevArg) { - tokens.add(new MissingArgPtg()); - addArgumentPointer(argumentPointers); + temp.add(new ParseNode(new MissingArgPtg())); numArgs++; } if (look == ')') { @@ -493,8 +402,7 @@ public final class FormulaParser { missedPrevArg = true; continue; } - comparisonExpression(); - addArgumentPointer(argumentPointers); + temp.add(comparisonExpression()); numArgs++; missedPrevArg = false; SkipWhite(); @@ -502,32 +410,34 @@ public final class FormulaParser { throw expected("',' or ')'"); } } - return numArgs; + ParseNode[] result = new ParseNode[temp.size()]; + temp.toArray(result); + return result; } /** Parse and Translate a Math Factor */ - private void powerFactor() { - percentFactor(); + private ParseNode powerFactor() { + ParseNode result = percentFactor(); while(true) { SkipWhite(); if(look != '^') { - return; + return result; } Match('^'); - percentFactor(); - tokens.add(new PowerPtg()); + ParseNode other = percentFactor(); + result = new ParseNode(new PowerPtg(), result, other); } } - private void percentFactor() { - tokens.add(parseSimpleFactor()); + private ParseNode percentFactor() { + ParseNode result = parseSimpleFactor(); while(true) { SkipWhite(); if(look != '%') { - return; + return result; } Match('%'); - tokens.add(new PercentPtg()); + result = new ParseNode(new PercentPtg(), result); } } @@ -535,32 +445,30 @@ public final class FormulaParser { /** * factors (without ^ or % ) */ - private Ptg parseSimpleFactor() { + private ParseNode parseSimpleFactor() { SkipWhite(); switch(look) { case '#': - return parseErrorLiteral(); + return new ParseNode(parseErrorLiteral()); case '-': Match('-'); - powerFactor(); - return new UnaryMinusPtg(); + return new ParseNode(new UnaryMinusPtg(), powerFactor()); case '+': Match('+'); - powerFactor(); - return new UnaryPlusPtg(); + return new ParseNode(new UnaryPlusPtg(), powerFactor()); case '(': Match('('); - comparisonExpression(); + ParseNode inside = comparisonExpression(); Match(')'); - return new ParenthesisPtg(); + return new ParseNode(new ParenthesisPtg(), inside); case '"': - return parseStringLiteral(); + return new ParseNode(parseStringLiteral()); } if (IsAlpha(look) || look == '\''){ - return parseIdent(); + return parseFunctionOrIdentifier(); } // else - assume number - return parseNumber(); + return new ParseNode(parseNumber()); } @@ -716,28 +624,30 @@ public final class FormulaParser { } /** Parse and Translate a Math Term */ - private void Term() { - powerFactor(); + private ParseNode Term() { + ParseNode result = powerFactor(); while(true) { SkipWhite(); + Ptg operator; switch(look) { case '*': Match('*'); - powerFactor(); - tokens.add(new MultiplyPtg()); - continue; + operator = new MultiplyPtg(); + break; case '/': Match('/'); - powerFactor(); - tokens.add(new DividePtg()); - continue; + operator = new DividePtg(); + break; + default: + return result; // finished with Term } - return; // finished with Term + ParseNode other = powerFactor(); + result = new ParseNode(operator, result, other); } } - private void comparisonExpression() { - concatExpression(); + private ParseNode comparisonExpression() { + ParseNode result = concatExpression(); while (true) { SkipWhite(); switch(look) { @@ -745,11 +655,11 @@ public final class FormulaParser { case '>': case '<': Ptg comparisonToken = getComparisonToken(); - concatExpression(); - tokens.add(comparisonToken); + ParseNode other = concatExpression(); + result = new ParseNode(comparisonToken, result, other); continue; } - return; // finished with predicate expression + return result; // finished with predicate expression } } @@ -779,38 +689,41 @@ public final class FormulaParser { } - private void concatExpression() { - additiveExpression(); + private ParseNode concatExpression() { + ParseNode result = additiveExpression(); while (true) { SkipWhite(); if(look != '&') { break; // finished with concat expression } Match('&'); - additiveExpression(); - tokens.add(new ConcatPtg()); + ParseNode other = additiveExpression(); + result = new ParseNode(new ConcatPtg(), result, other); } + return result; } /** Parse and Translate an Expression */ - private void additiveExpression() { - Term(); + private ParseNode additiveExpression() { + ParseNode result = Term(); while (true) { SkipWhite(); + Ptg operator; switch(look) { case '+': Match('+'); - Term(); - tokens.add(new AddPtg()); - continue; + operator = new AddPtg(); + break; case '-': Match('-'); - Term(); - tokens.add(new SubtractPtg()); - continue; + operator = new SubtractPtg(); + break; + default: + return result; // finished with additive expression } - return; // finished with additive expression + ParseNode other = Term(); + result = new ParseNode(operator, result, other); } } @@ -835,7 +748,7 @@ end; public void parse() { pointer=0; GetChar(); - comparisonExpression(); + _rootNode = comparisonExpression(); if(pointer <= formulaLength) { String msg = "Unused input [" + formulaString.substring(pointer-1) @@ -858,91 +771,12 @@ end; } public Ptg[] getRPNPtg(int formulaType) { - Node node = createTree(); + OperandClassTransformer oct = new OperandClassTransformer(formulaType); // RVA is for 'operand class': 'reference', 'value', 'array' - setRootLevelRVA(node, formulaType); - setParameterRVA(node,formulaType); - return (Ptg[]) tokens.toArray(new Ptg[0]); + oct.transformFormula(_rootNode); + return ParseNode.toTokenArray(_rootNode); } - private void setRootLevelRVA(Node n, int formulaType) { - //Pg 16, excelfileformat.pdf @ openoffice.org - Ptg p = n.getValue(); - if (formulaType == FormulaParser.FORMULA_TYPE_NAMEDRANGE) { - if (p.getDefaultOperandClass() == Ptg.CLASS_REF) { - setClass(n,Ptg.CLASS_REF); - } else { - setClass(n,Ptg.CLASS_ARRAY); - } - } else { - setClass(n,Ptg.CLASS_VALUE); - } - - } - - private void setParameterRVA(Node n, int formulaType) { - Ptg p = n.getValue(); - int numOperands = n.getNumChildren(); - if (p instanceof AbstractFunctionPtg) { - for (int i =0;i + *

  • reference
  • + *
  • value
  • + *
  • array
  • + * + *

    + * + * The final operand class chosen for each token depends on the formula type and the token's place + * in the formula. If POI gets the operand class wrong, Excel may interpret the formula + * incorrectly. This condition is typically manifested as a formula cell that displays as '#VALUE!', + * but resolves correctly when the user presses F2, enter.

    + * + * The logic implemented here was partially inspired by the description in + * "OpenOffice.org's Documentation of the Microsoft Excel File Format". The model presented there + * seems to be inconsistent with observed Excel behaviour (These differences have not been fully + * investigated). The implementation in this class has been heavily modified in order to satisfy + * concrete examples of how Excel performs the same logic (see TestRVA).

    + * + * Hopefully, as additional important test cases are identified and added to the test suite, + * patterns might become more obvious in this code and allow for simplification. + * + * @author Josh Micich + */ +final class OperandClassTransformer { + + private final int _formulaType; + + public OperandClassTransformer(int formulaType) { + _formulaType = formulaType; + } + + /** + * Traverses the supplied formula parse tree, calling Ptg.setClass() for each non-base + * token to set its operand class. + */ + public void transformFormula(ParseNode rootNode) { + byte rootNodeOperandClass; + switch (_formulaType) { + case FormulaParser.FORMULA_TYPE_CELL: + rootNodeOperandClass = Ptg.CLASS_VALUE; + break; + default: + throw new RuntimeException("Incomplete code - formula type (" + + _formulaType + ") not supported yet"); + + } + transformNode(rootNode, rootNodeOperandClass, false); + } + + private void transformNode(ParseNode node, byte desiredOperandClass, + boolean callerForceArrayFlag) { + Ptg token = node.getToken(); + ParseNode[] children = node.getChildren(); + if (token instanceof ValueOperatorPtg || token instanceof ControlPtg) { + // Value Operator Ptgs and Control are base tokens, so token will be unchanged + + // but any child nodes are processed according to desiredOperandClass and callerForceArrayFlag + for (int i = 0; i < children.length; i++) { + ParseNode child = children[i]; + transformNode(child, desiredOperandClass, callerForceArrayFlag); + } + return; + } + if (token instanceof AbstractFunctionPtg) { + transformFunctionNode((AbstractFunctionPtg) token, children, desiredOperandClass, + callerForceArrayFlag); + return; + } + if (children.length > 0) { + throw new IllegalStateException("Node should not have any children"); + } + + if (token.isBaseToken()) { + // nothing to do + return; + } + if (callerForceArrayFlag) { + switch (desiredOperandClass) { + case Ptg.CLASS_VALUE: + case Ptg.CLASS_ARRAY: + token.setClass(Ptg.CLASS_ARRAY); + break; + case Ptg.CLASS_REF: + token.setClass(Ptg.CLASS_REF); + break; + default: + throw new IllegalStateException("Unexpected operand class (" + + desiredOperandClass + ")"); + } + } else { + token.setClass(desiredOperandClass); + } + } + + private void transformFunctionNode(AbstractFunctionPtg afp, ParseNode[] children, + byte desiredOperandClass, boolean callerForceArrayFlag) { + + boolean localForceArrayFlag; + byte defaultReturnOperandClass = afp.getDefaultOperandClass(); + + if (callerForceArrayFlag) { + switch (defaultReturnOperandClass) { + case Ptg.CLASS_REF: + if (desiredOperandClass == Ptg.CLASS_REF) { + afp.setClass(Ptg.CLASS_REF); + } else { + afp.setClass(Ptg.CLASS_ARRAY); + } + localForceArrayFlag = false; + break; + case Ptg.CLASS_ARRAY: + afp.setClass(Ptg.CLASS_ARRAY); + localForceArrayFlag = false; + break; + case Ptg.CLASS_VALUE: + afp.setClass(Ptg.CLASS_ARRAY); + localForceArrayFlag = true; + break; + default: + throw new IllegalStateException("Unexpected operand class (" + + defaultReturnOperandClass + ")"); + } + } else { + if (defaultReturnOperandClass == desiredOperandClass) { + localForceArrayFlag = false; + // an alternative would have been to for non-base Ptgs to set their operand class + // from their default, but this would require the call in many subclasses because + // the default OC is not known until the end of the constructor + afp.setClass(defaultReturnOperandClass); + } else { + switch (desiredOperandClass) { + case Ptg.CLASS_VALUE: + // always OK to set functions to return 'value' + afp.setClass(Ptg.CLASS_VALUE); + localForceArrayFlag = false; + break; + case Ptg.CLASS_ARRAY: + switch (defaultReturnOperandClass) { + case Ptg.CLASS_REF: + afp.setClass(Ptg.CLASS_REF); + break; + case Ptg.CLASS_VALUE: + afp.setClass(Ptg.CLASS_ARRAY); + break; + default: + throw new IllegalStateException("Unexpected operand class (" + + defaultReturnOperandClass + ")"); + } + localForceArrayFlag = (defaultReturnOperandClass == Ptg.CLASS_VALUE); + break; + case Ptg.CLASS_REF: + switch (defaultReturnOperandClass) { + case Ptg.CLASS_ARRAY: + afp.setClass(Ptg.CLASS_ARRAY); + break; + case Ptg.CLASS_VALUE: + afp.setClass(Ptg.CLASS_VALUE); + break; + default: + throw new IllegalStateException("Unexpected operand class (" + + defaultReturnOperandClass + ")"); + } + localForceArrayFlag = false; + break; + default: + throw new IllegalStateException("Unexpected operand class (" + + desiredOperandClass + ")"); + } + + } + } + + for (int i = 0; i < children.length; i++) { + ParseNode child = children[i]; + byte paramOperandClass = afp.getParameterClass(i); + transformNode(child, paramOperandClass, localForceArrayFlag); + } + } +} diff --git a/src/java/org/apache/poi/hssf/model/ParseNode.java b/src/java/org/apache/poi/hssf/model/ParseNode.java new file mode 100644 index 000000000..acd8cb12b --- /dev/null +++ b/src/java/org/apache/poi/hssf/model/ParseNode.java @@ -0,0 +1,201 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hssf.model; + +import org.apache.poi.hssf.record.formula.AttrPtg; +import org.apache.poi.hssf.record.formula.FuncVarPtg; +import org.apache.poi.hssf.record.formula.Ptg; +import org.apache.poi.hssf.record.formula.function.FunctionMetadataRegistry; +/** + * Represents a syntactic element from a formula by encapsulating the corresponding Ptg + * token. Each ParseNode may have child ParseNodes in the case when the wrapped + * Ptg is non-atomic. + * + * @author Josh Micich + */ +final class ParseNode { + + public static final ParseNode[] EMPTY_ARRAY = { }; + private final Ptg _token; + private final ParseNode[] _children; + private boolean _isIf; + private final int _tokenCount; + + public ParseNode(Ptg token, ParseNode[] children) { + _token = token; + _children = children; + _isIf = isIf(token); + int tokenCount = 1; + for (int i = 0; i < children.length; i++) { + tokenCount += children[i].getTokenCount(); + } + if (_isIf) { + // there will be 2 or 3 extra tAttr tokens according to whether the false param is present + tokenCount += children.length; + } + _tokenCount = tokenCount; + } + public ParseNode(Ptg token) { + this(token, EMPTY_ARRAY); + } + public ParseNode(Ptg token, ParseNode child0) { + this(token, new ParseNode[] { child0, }); + } + public ParseNode(Ptg token, ParseNode child0, ParseNode child1) { + this(token, new ParseNode[] { child0, child1, }); + } + private int getTokenCount() { + return _tokenCount; + } + + /** + * Collects the array of Ptg tokens for the specified tree. + */ + public static Ptg[] toTokenArray(ParseNode rootNode) { + TokenCollector temp = new TokenCollector(rootNode.getTokenCount()); + rootNode.collectPtgs(temp); + return temp.getResult(); + } + private void collectPtgs(TokenCollector temp) { + if (isIf(getToken())) { + collectIfPtgs(temp); + return; + } + for (int i=0; i< getChildren().length; i++) { + getChildren()[i].collectPtgs(temp); + } + temp.add(getToken()); + } + /** + * The IF() function gets marked up with two or three tAttr tokens. + * Similar logic will be required for CHOOSE() when it is supported + * + * See excelfileformat.pdf sec 3.10.5 "tAttr (19H) + */ + private void collectIfPtgs(TokenCollector temp) { + + // condition goes first + getChildren()[0].collectPtgs(temp); + + // placeholder for tAttrIf + int ifAttrIndex = temp.createPlaceholder(); + + // true parameter + getChildren()[1].collectPtgs(temp); + + // placeholder for first skip attr + int skipAfterTrueParamIndex = temp.createPlaceholder(); + int trueParamSize = temp.sumTokenSizes(ifAttrIndex+1, skipAfterTrueParamIndex); + + AttrPtg attrIf = new AttrPtg(); + attrIf.setOptimizedIf(true); + AttrPtg attrSkipAfterTrue = new AttrPtg(); + attrSkipAfterTrue.setGoto(true); + + if (getChildren().length > 2) { + // false param present + + // false parameter + getChildren()[2].collectPtgs(temp); + + int skipAfterFalseParamIndex = temp.createPlaceholder(); + + AttrPtg attrSkipAfterFalse = new AttrPtg(); + attrSkipAfterFalse.setGoto(true); + + int falseParamSize = temp.sumTokenSizes(skipAfterTrueParamIndex+1, skipAfterFalseParamIndex); + + attrIf.setData((short)(trueParamSize + 4)); // distance to start of false parameter. +4 for skip after true + attrSkipAfterTrue.setData((short)(falseParamSize + 4 + 4 - 1)); // 1 less than distance to end of if FuncVar(size=4). +4 for attr skip before + attrSkipAfterFalse.setData((short)(4 - 1)); // 1 less than distance to end of if FuncVar(size=4). + + temp.setPlaceholder(ifAttrIndex, attrIf); + temp.setPlaceholder(skipAfterTrueParamIndex, attrSkipAfterTrue); + temp.setPlaceholder(skipAfterFalseParamIndex, attrSkipAfterFalse); + } else { + // false parameter not present + attrIf.setData((short)(trueParamSize + 4)); // distance to start of FuncVar. +4 for skip after true + attrSkipAfterTrue.setData((short)(4 - 1)); // 1 less than distance to end of if FuncVar(size=4). + + temp.setPlaceholder(ifAttrIndex, attrIf); + temp.setPlaceholder(skipAfterTrueParamIndex, attrSkipAfterTrue); + } + + temp.add(getToken()); + } + + private static boolean isIf(Ptg token) { + if (token instanceof FuncVarPtg) { + FuncVarPtg func = (FuncVarPtg) token; + if (FunctionMetadataRegistry.FUNCTION_NAME_IF.equals(func.getName())) { + return true; + } + } + return false; + } + + public Ptg getToken() { + return _token; + } + + public ParseNode[] getChildren() { + return _children; + } + + private static final class TokenCollector { + + private final Ptg[] _ptgs; + private int _offset; + + public TokenCollector(int tokenCount) { + _ptgs = new Ptg[tokenCount]; + _offset = 0; + } + + public int sumTokenSizes(int fromIx, int toIx) { + int result = 0; + for (int i=fromIx; in-0 zW@mSHiJK;kXLWaH?)=~X|M&j?{qLPSGgr@ick+pQKYP|I!n3UvVez|(G7)v;8`ux& z<4Pe)u+O~TO-xKMJB)l``MHW-ew>Q zIBTvDppF-X&=x{dto%ZF7DMx=EdNy6pM8NDQ$57Es=$rTdTjWHam8)pPaCpNVP}Eu z5_sRRxLWL#et5OYEaH2L2pN>9`<_CeXMyP39g5Nr>mYfL7!sSH!&QaAT{M)>@@OA$ zoy>6%NB*#Nu0(7Vdm(?1NXfR>fmYZn>x4O@q7VB6IruQU2J+btN06uk^MW?&`?N)* z^(J2T1#F+Qa2!J_L!!QukjjzHKspnt0trI^F&SwJ(p02rNYjyKAk9RoLLx7w+1Q_h zbT-mlqMp}YYgOos8inI)=7O4)Y9_bvU2Bb!$BvKR7a-NF=r1O#1Agx8BPN%nti4w6M9@Zm$FfDt4;{0(XMug6KMAZgvj1zPC zg)M}3*DumyGrT`7MqpEZ3g4>Uf&5aiAEv`$Gc5V)sQB;rJCIK)Y%8Z~E4Kcfa@h`a z{A3JZEMXq&NVk6;v;CiLGtQd+|FMr|s5)%e_nmCNLyuD;dtf)3%{%n{?|lDA{p`xa zcgDoB+TT3NcKtIX_9;BS8HxSKrAX{Ab|KNv`jIM;Mv>UB+=#?J5BRz~n z<$nanWuHR-<)(%f<&qbV$&;J+cP@kwPha1@gS17zUalQFY+Nz9+QUKtDQphZAU+gSZUB| z;k(e?mx`~+vqXfAn2Z>cB!sb}JX99BFU-9%JI@G#sicb-)Q4v?qqezH_7X$s;XNX4 z2CsNha)#cIf;BWFLQSya;eTrB4b)6A`n(f*mgX(i<`|NcRtHnD9H;W5l0IfPVHY7;tmV1+py$S6qH`WlO zw5=#>P*E$1Ct*$d>=;k0v-(%t>LFX+L!i$sFDgi>&TH&Z8+$asHl%c8AGWa%2iS&` zZtNpA_7R-Pt#eqA(vAI;jr~-BZAj_HK5Anh4X_O<6}C8Q9J3`H3$O+$-P%22W1k4H z1}WXxZ`jyx1XzQVZtRmb_DP&k=loiCSdh|<{icomW`H$F>Bc@~W1kAJ1}PP`IBPs_ zOL#uO8l-e<_kxXmA;21>bYp*DV}B4}4N|(XFWT4_aYmi!F$|BAGs`y~X?yqjnzqK;%^fv~Wm`MCYZ?=YL>zo$NEPz81alna z@-qoq#lb6rR3blRpew<-5=Rc@Bps4D+rXS5VPelMxubjihD#~GF(FmRFPoAkK@z2B z^vGyKoj4zpjQ<7Z&e*N5SV3x9g?U4Ac4pJYjaxTtqtIk+GQpDa$yXNB@El`0LSY@3Z zlEdzfEn8}6y4r?>EPEW^+BS-t@`OX$9wh-l-HeFH!B|5fjShuaF3!aF~#Nz7J)r-5@ zE|mv~MD2#v8(7xFUo|zasB2_dl`pW|N>~1tpl+BVvJ92jSGub@r{_>t=4ndqm*S{Y z*0Q2dJd~g7y5#6f)h{LU6Gh^6MKb<`I16}KY<DUiiC=5# z(j%+c#u&+I74Gqvb;Eks@sC$e$@qJLzaD!V_VuLu&GbOe;I8zDNk2DJBj%FS@NjDX zl6uqZ9vn4C_711b>vpF`&EdWMY4fVRBcpx2eIOs*-8WKiZr;>kGw2(Y@M}~3d(%sR z>Fe2TDiI^;^d2Av@zIy?OB?E%QTAlrvplLcwGCNACDLA+I%5ajX6V7n z7nGe97bTs369orVpFgzfv!Q-vWApM=%_~;2H})*KK7KYySlb_Zj+kTTAI}^r*=22sBfrkhOX!0cO`NX%cNUew4(N+=7iHTDy$I( zC2E~%DW~50n7RkksCv!BY~=yusQU6LAvG>&T*Y!;B1<(TG$ikgY}?Y-y>;`Zts80z zFIfUy7_&KwpqKEQeTy17oyu~tWb}`vAvOAA33%^^IYV-W5M8N5bJzhsb3l3uM-Kj4 zIct?ZOY4wld#GEDD4Ujdv8KvIx0OF6?=HDz1=*k8np+2LVQqDN?9@%?Wz$e+Ov6Fv z;ZQ`5a<>ldNhA29-?^?YJ-o}ja*+Fj12`Prjcy@r+LktZ_YU-o_6-g&9xz_e!zhc9 z{0zCLa#ZY4`KtHgS#^%z3gz_#ML`tHLY%VUZI%riaJ)?Fej9AaHfd|dp%6yK>o78A zbNBNyXG>B#DJ?SX`@T@2v7B?Y$@qQHf}_uj7Hqf)Xz4APvV&@8YRoWAmQ4sXW-vmb z>aF`nM$;Jd{bfUZVeT=5>iaV@mU<-MzWz~jn;czbW-j%t&@<1zihPBm4|C>^d5VMH z^UnN{{v@udoHKx=Rebc!5a-*;XKLscqu)B0eHD6(ygJk8Ggw2;^%84@72jM~eLjPI zmyLZ_09K#Z*t?;r()pkB=-hy7NK(4X`-+XdCZx}0>wFMcQo6CICQEJ{CvufH9x+Jh z!Tt@TDV^gH%{N;v>zR~pxp$*>HCESSHujnj%d5|4>PpvhJfiC{7gnFwSRwAU<=&gq zMykB}dyr5npTDGGaG0G5<)><$~d zBLGWEH+G+m-4}o*r5nq;I!fn*0a#MHu@BnV2LrIAbYsVD?05i{ly2;EHukvyEGgaC zlQ#Bb0G5<)Yzz%k)m6;M?~|l-V@(@t24G3)#;&xnD+92kbYnN$*v$c0Qo6CbZS3v< zEGgaCF&jG;fF-3HdzX#9D*#JMH}*3&_A>!kQo6BE+t{ZAu%vWjPuSQK0a#MHvA?#l zzYf5X(v7Xe421H@%20lvB&8c$V`FOqu%vWjTWxG>0G5<)?4>sL(f}+e-Pj=;I~0H= zr5pPJ8~cF(EGgaCPukc|24G3)#y)Oi9}mEi(v5w_#y%5(C8ZntnvH!e082_YmKQh5 zuS7V%Uy;&{t+KII0a#MHu?;r1AplECH}*msdtm^Uly2+}8@nR_OG-C(pN-uYfF-3H zd(g%n48W4ojeXF@J{W){r5ihLW5)xqq;zASv$4+wU`gr5p0u$i1F)oYV`Dh9`rk-? zpCqNisyk10c*>Q>xvrx0HITY4%3MV`pSM^5eA}e=i=PYTa??+U)L@@EDpm26lqPpP zU(|wERqB^BoIFNYe!4OL-W2<%I6fK zaD-PxNPH%84BtZh5b1K<&qB*E|Bzd|)FCCGEF6dMHP|!6KN*~L#FTp92#vpUaPaCQ zAgmNGW0K=U#S~86hG$8|@nAkwpDMpUE!liUey@?YvQ`>HD>u(sDQ#|uOGB-a?nCms zT7I92Z$o@hVqTCrjc6z@0ud7j87D#_S!%4qL;C%ANPj0DU+%+=(?2eXvOrCz1yC56 zZj{qqIZMk21M^daFm)SLW0Uhz)EWjJ=FcZ35=g;EDDv~*Lm1^!4;VeCZP(`2l>V_i zkmCFR3PZHyE$wcf((bW*FlBimr~UHP7&VRO)MIXhx90IsZuP?I1+@znG+dE5@A4Xb zaCu^GWIlrK9M}1GZ%OZZkDI6(caZSLhsM=6xcT6itd>C4YA= zshUG%OG`#*GbX?BIE5`#<88SbEJbIKC#)9)el=mjPI?U)-{8Ld6tVKf( zeysWodFuJ|R(|9Y?)ta$Kb0f@Xpa0xbL4+KNB*aCcTTz!&+vRq((}fdtj{}od%r&K>+?k9i!#3iF+}G78U%9F{+Y;;Uz;O;sc$}S z8~e2*zVqO=ukvT!K9%|3lW%a~PQKEI5A8Ve0ay7Pl3V#Ec5(O3lkbURygm0zK3?d_ z=VgiHW7nH%7kDQ@^WTA8yxpnz^N}Sn!)q{ZR{Y_^kbe2(FZ1b0+-!O) z-@)g`*fepYrjx|$#H&Z9K6D^fCl0vsB_rGro+d9~IlyJ-jBah|ndoss*#I>LE&{;u zc&Hc&ytXU~z=9VAU{_icfU&A5Kxwf68l@-+3>!rO*xri*Fc1_4V1y_Nz|c_?fH9>g z0E0?iK%1vMFxrH3duv2Clyom$PK_{4^FYkYgrTVbCUnExOc=)sU_#f-&4j_N048+T z+)Nnp3SdGP&dr43s{kf+>)cEjO9Po4@62x+mV%0yN2Q9dvw! zFv)KnKJi$9Ci$&{>p}+;9Ju=u)pn9i`K`m10y3K9WRl-HoZ2jaNq*~~2QP$4e(RWp zG0AV8`GqmbZyj7}2Ny@p2-#Rrm z6ONaTi0FiC9LBc86BFP1mx+vZKH}-T5}+uRofm1}JRtm;yz#~xHj`3|@y$2iJYiZ} z%qOqRkq6##i~HmqhK$cQ1EiG9}IrFqq%(HaDWT%_7YjJY2GH6xpsQlDhb0XlQ&EusT&1`H+Q**yFQ<=r$2OSO za)c3@)FqX^!_53a9&>rYOeR|LGifS>iMPA)+I2aYWE>%@I0s@io;J1u0K*<2a!{?y zpY8g5?mg2UTUxI5ukeZEcW1f&scVm31M;P{j2C1D3ElW7#td7jY*$+8Vsa#%!<8m} zdc?uR8yUDrPbM;CU3+SOoR~OnGtuGdvBw_EiVV&?9T}JhDRR@1!I_tBGtB(DY}Vp$ zGrt(>3{AQkoq1X*XK2z&IrFl^6_+u}cxhHmWdqK40laQ)9EffAxHT6@;)CHi<^fpm z-e&D=>bmR5)?}@%iR{17h3WQ^V}hZ@22fxDWo<1M&{iy0U9-*1oG1pZ(ES{Kd@jit+KZWqHInJ?(hs@$nb@#hm38 zdTC(Y* zpL4FSXc$Amvd=FEn7vWaY<}QDx3o}gN zKVGJbJWQQFBgDtQe3?JfMH!~>7BAC;hpE#;gc#d(pFh*Z8K&?JFVi{?Q>Q-&anU1B z`7>RTVG2+1GHvuQby{DD|NhuUKc+D(_=%BTdurFA*ebW>joDbFtQ{AsyX>y91jBhw zf^c-iea@6vk)#sOdElM}79{DxE7yGY@lfm*H^a~jtZ1kn-s0Mq{a~G0YqeC4B*>J5 zdo6$$NKyb_yXHw=nQ>Vs=TXdc#Fx=Cl!rqVczOjFitBc#d#*OurThC2#cuZKHFF{k zy>??T*n?GZP3)(kT|bJQ(TX!9a!}}Oq*^OpKSXV<)RERo9ciu9(P6w&x@;^waVTcG zRVfKWh*}(%OVM&8ZS7bTv^5^uD3*%P(3;}i=n39qW9r5tz%2G)A`>xS_TtGoF8w)x z0`IpG^<$+#)O!%66XjV5x2)-Sbr*MTHtZYfPYt9-2Z#5Y+tT|+55+EUvkI-4sKkob z`|xBVZ9gFG&y|Sxqt3TMoqclk!4QVq%Drf8%e~4_HcUudFgOTDJ>Vk`!KTJAPhC?=WghZCe>dOpWFfJVn_ zO{pR0bTC3Wj8}}Cks~?=hW5gh_IkKXLw#`B<{|j6ct~NXC%=Bcc=G`4yO>F zJb~SqgTUU6+G0tE;gbwZ>QaSeSKc0g>HkDZ_I0YqGMe zXJ8lH?Dn>b?F}oTqz}b*x_N}+Xm8{(3Z?jN0j~k2Dwr$MA(8VunLw?MbVyW3IwY#2 z!=Xud?hkhzHtiji7QfCTbUIjXl68=6b7z;q2@LSHL{m;2ond9P2B)wHRcvMzR2$J&KODPMeAqF`auB7PjL$YO*(poA6)NwSdtE3ygDKZtUxixi6W2-)%22Qc zU5Q?J6rr|R?7;KDLc9;9TEo#1JVUJ(drz!N+~bg`+*VCh-H+$$?{}Od?sm8_-#mE~ zE5qP7GE+YI!!ukfeLjCm(Z2O&twYNxKaevW!*Z*~uzU7T4e^#ROL)VHX^Ie7T52oS zn^H@u$n_Kwo=kO0E>nWlyCgjqLu1-TUIwk_K? zV0A9Jj6wp>WpF7gCY7;~T5x$6DbT^ZQLdPR^{%WPh(`(J7m>@bBE+)Br3%3E;0jc) z<0UzxqPjP8^%!|HFK{&(;9cwYU}E(5S>pu-RqJ|L3(F<&FoD+3eIkMJNZ0xe+cs?J zs%dNAibdHHSO?3y00kT?vm`|ki?cwid!Z70Q54)2a-RuY!UT${DrsxvDsQZHJl&8{ z%e`Jljpbj;M2>SAn`y5=N1Dk**NjE{aARIqmpbKLpbyUGCeW%utBkX`7rMi92l-a3 zgUe*rBun6#3oSsi)@$3*jok}-b)X(j)jCcXro5B&TV#c7@5ZWrRvR4Z!@?!9Ds&q{ z9MGP|)#X$R#5l@6Sbu63rMv$Ba^13NERVe=puMyl9<|C^4Q>M0TQTuXv;=fG;#&>) z*At*ab$De@Yz7htt!;+&8xpr8nBVR=!xJ1Fs+XrKT`y0a zflif!kPy0<3b{WS-(1idce-aGk0-xYaL z>MYEbPb%^8n_qp zvj*atYWX3w^`}LX-2S~q|6(Q%Gd)Li*zr-|d`<$$Z9C#v3=69>I%6y6;;%v;Q^GYJ zjGx#r*T9XBxv1w!Vj&Dwo}1It(|n@>qi*PhFu8&C0`=A}epIPabP)*||T&#yAJ27@qs}YJ~ zKa6uMFrBC!sCvaFmbmHI;mi)*$j1LMswz)Nm zt5Y_R3Q<+Je%o*7m$l*m+az*oH%euqYP_(mkfbbWM6yzKw~TWNWGA;+tNNX;7?kiw zl{wrFMaYVCITgYL9a-VA2OKF@oia>#Sv5-=NK1rK_oy$6A*wAUxrbpDWQUQj{&1#5 z)@W49qJosgTGX=6=5!d2+~vk?D3&`a73F+i1tOyYmpZ_v9DRAgajLTu;I{iEo^StD z$jg5Uk2&yjI0=e#YSaD}CBCgoV)=%UikifhIj|;*3-B%aQs3tr8sMq9R}0wVQ7?2bzgml@}4 zE}1aIkqHOSS`B&M(N;qjV-h+csFAADb$Etkz<5Xo9D{2H%Q0<}DTih-ms*6zou05W zd^RgqxeiNqrsT=EMxI;`K=VvUg}f!3%70#m05NV@eA~g1jM(%mo#bBCbz>Xn`JACY1EC!5yIO1{-v#=vw6ddr^9`2SVm@c90KcbjKX8|;Hvjv?m zp&1KGcr`5OW;8=GVy6=ord*S@751b=Z4BAGF}=|qS}R4*ln8c7QM^x1?c0&K={lR? za2#8#)iSCinlDsDMZa9lp8licsWM+I6^H4eJN}1KYq-_kVT77#h}AJi4*tncwQ&dH zPfP(Kt&!H9;mZt57}FM-aP8%&Ep)bH?PJgFX5BN%J5=qee&vj^6EW#G#Sg=CjZSQJM;OnE}73VtJuqY zj^o*HnDQTQ`u0$?2wEVq3nVrBA|A0;D&(R;Mn2729g+-O3ka$vLrYe%$NJF!*@S}a*}ARQGSl@r|+;{Nx2;Hs8|&8_5z z7>Bue1<}8$2k4L#6Xw@?%vOxc9W2l^&VUU1JYua=m8}`FX`+R{O49qNwpr?$%B~zx z+55CcLb4`N6#s*ZZfmg~-_eQ#Y%~3eNw%5F^fUE4sq>{$8TN%#jqRmEE-6ostd-}Y zumszTEL*f8!=)b)~@Ba3H2y(=QHGEfyh zNu(>pKj2o+yTymFzgn!t!@6IH@+ztoiQ8}I3-LkBD1MNhZEtVRKN}LmjP$JdIb2)? h>m6~uZtHf_m9d%PN(Tm)0epNeyk1R4N?_4n{{ws3hUfqQ literal 0 HcmV?d00001 diff --git a/src/testcases/org/apache/poi/hssf/model/AllModelTests.java b/src/testcases/org/apache/poi/hssf/model/AllModelTests.java index 15f9b1b40..045e371a2 100755 --- a/src/testcases/org/apache/poi/hssf/model/AllModelTests.java +++ b/src/testcases/org/apache/poi/hssf/model/AllModelTests.java @@ -34,6 +34,8 @@ public final class AllModelTests { result.addTestSuite(TestFormulaParser.class); result.addTestSuite(TestFormulaParserEval.class); result.addTestSuite(TestFormulaParserIf.class); + result.addTestSuite(TestOperandClassTransformer.class); + result.addTestSuite(TestRVA.class); result.addTestSuite(TestSheet.class); result.addTestSuite(TestSheetAdditional.class); return result; diff --git a/src/testcases/org/apache/poi/hssf/model/TestOperandClassTransformer.java b/src/testcases/org/apache/poi/hssf/model/TestOperandClassTransformer.java new file mode 100644 index 000000000..90a5d8b13 --- /dev/null +++ b/src/testcases/org/apache/poi/hssf/model/TestOperandClassTransformer.java @@ -0,0 +1,110 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hssf.model; + +import junit.framework.AssertionFailedError; +import junit.framework.TestCase; + +import org.apache.poi.hssf.record.formula.AbstractFunctionPtg; +import org.apache.poi.hssf.record.formula.FuncVarPtg; +import org.apache.poi.hssf.record.formula.Ptg; + +/** + * Tests specific formula examples in OperandClassTransformer. + * + * @author Josh Micich + */ +public final class TestOperandClassTransformer extends TestCase { + + public void testMdeterm() { + String formula = "MDETERM(ABS(A1))"; + Ptg[] ptgs = FormulaParser.parse(formula, null); + + confirmTokenClass(ptgs, 0, Ptg.CLASS_ARRAY); + confirmFuncClass(ptgs, 1, "ABS", Ptg.CLASS_ARRAY); + confirmFuncClass(ptgs, 2, "MDETERM", Ptg.CLASS_VALUE); + } + + /** + * In the example: INDEX(PI(),1), Excel encodes PI() as 'array'. It is not clear + * what rule justifies this. POI currently encodes it as 'value' which Excel(2007) seems to + * tolerate. Changing the metadata for INDEX to have first parameter as 'array' class breaks + * other formulas involving INDEX. It seems like a special case needs to be made. Perhaps an + * important observation is that INDEX is one of very few functions that returns 'reference' type. + * + * This test has been added but disabled in order to document this issue. + */ + public void DISABLED_testIndexPi1() { + String formula = "INDEX(PI(),1)"; + Ptg[] ptgs = FormulaParser.parse(formula, null); + + confirmFuncClass(ptgs, 1, "PI", Ptg.CLASS_ARRAY); // fails as of POI 3.1 + confirmFuncClass(ptgs, 2, "INDEX", Ptg.CLASS_VALUE); + } + + public void testComplexIRR_bug45041() { + String formula = "(1+IRR(SUMIF(A:A,ROW(INDIRECT(MIN(A:A)&\":\"&MAX(A:A))),B:B),0))^365-1"; + Ptg[] ptgs = FormulaParser.parse(formula, null); + + FuncVarPtg rowFunc = (FuncVarPtg) ptgs[10]; + FuncVarPtg sumifFunc = (FuncVarPtg) ptgs[12]; + assertEquals("ROW", rowFunc.getName()); + assertEquals("SUMIF", sumifFunc.getName()); + + if (rowFunc.getPtgClass() == Ptg.CLASS_VALUE || sumifFunc.getPtgClass() == Ptg.CLASS_VALUE) { + throw new AssertionFailedError("Identified bug 45041"); + } + confirmTokenClass(ptgs, 1, Ptg.CLASS_REF); + confirmTokenClass(ptgs, 2, Ptg.CLASS_REF); + confirmFuncClass(ptgs, 3, "MIN", Ptg.CLASS_VALUE); + confirmTokenClass(ptgs, 6, Ptg.CLASS_REF); + confirmFuncClass(ptgs, 7, "MAX", Ptg.CLASS_VALUE); + confirmFuncClass(ptgs, 9, "INDIRECT", Ptg.CLASS_REF); + confirmFuncClass(ptgs, 10, "ROW", Ptg.CLASS_ARRAY); + confirmTokenClass(ptgs, 11, Ptg.CLASS_REF); + confirmFuncClass(ptgs, 12, "SUMIF", Ptg.CLASS_ARRAY); + confirmFuncClass(ptgs, 14, "IRR", Ptg.CLASS_VALUE); + } + + private void confirmFuncClass(Ptg[] ptgs, int i, String expectedFunctionName, byte operandClass) { + confirmTokenClass(ptgs, i, operandClass); + AbstractFunctionPtg afp = (AbstractFunctionPtg) ptgs[i]; + assertEquals(expectedFunctionName, afp.getName()); + } + + private void confirmTokenClass(Ptg[] ptgs, int i, byte operandClass) { + Ptg ptg = ptgs[i]; + if (operandClass != ptg.getPtgClass()) { + throw new AssertionFailedError("Wrong operand class for function ptg (" + + ptg.toString() + "). Expected " + getOperandClassName(operandClass) + + " but got " + getOperandClassName(ptg.getPtgClass())); + } + } + + private static String getOperandClassName(byte ptgClass) { + switch (ptgClass) { + case Ptg.CLASS_REF: + return "R"; + case Ptg.CLASS_VALUE: + return "V"; + case Ptg.CLASS_ARRAY: + return "A"; + } + throw new RuntimeException("Unknown operand class (" + ptgClass + ")"); + } +} diff --git a/src/testcases/org/apache/poi/hssf/model/TestRVA.java b/src/testcases/org/apache/poi/hssf/model/TestRVA.java new file mode 100644 index 000000000..cb51c17bd --- /dev/null +++ b/src/testcases/org/apache/poi/hssf/model/TestRVA.java @@ -0,0 +1,156 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hssf.model; + +import junit.framework.AssertionFailedError; +import junit.framework.TestCase; + +import org.apache.poi.hssf.HSSFTestDataSamples; +import org.apache.poi.hssf.record.formula.AttrPtg; +import org.apache.poi.hssf.record.formula.Ptg; +import org.apache.poi.hssf.record.formula.ReferencePtg; +import org.apache.poi.hssf.usermodel.FormulaExtractor; +import org.apache.poi.hssf.usermodel.HSSFCell; +import org.apache.poi.hssf.usermodel.HSSFRow; +import org.apache.poi.hssf.usermodel.HSSFSheet; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; + +/** + * Tests 'operand class' transformation performed by + * OperandClassTransformer by comparing its results with those + * directly produced by Excel (in a sample spreadsheet). + * + * @author Josh Micich + */ +public final class TestRVA extends TestCase { + + private static final String NEW_LINE = System.getProperty("line.separator"); + + public void testFormulas() { + HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("testRVA.xls"); + HSSFSheet sheet = wb.getSheetAt(0); + + int countFailures = 0; + int countErrors = 0; + + int rowIx = 0; + while (rowIx < 65535) { + HSSFRow row = sheet.getRow(rowIx); + if (row == null) { + break; + } + HSSFCell cell = row.getCell(0); + if (cell == null || cell.getCellType() == HSSFCell.CELL_TYPE_BLANK) { + break; + } + String formula = cell.getCellFormula(); + try { + confirmCell(cell, formula); + } catch (AssertionFailedError e) { + System.err.println("Problem with row[" + rowIx + "] formula '" + formula + "'"); + System.err.println(e.getMessage()); + countFailures++; + } catch (RuntimeException e) { + System.err.println("Problem with row[" + rowIx + "] formula '" + formula + "'"); + countErrors++; + e.printStackTrace(); + } + rowIx++; + } + if (countErrors + countFailures > 0) { + String msg = "One or more RVA tests failed: countFailures=" + countFailures + + " countFailures=" + countErrors + ". See stderr for details."; + throw new AssertionFailedError(msg); + } + } + + private void confirmCell(HSSFCell formulaCell, String formula) { + Ptg[] excelPtgs = FormulaExtractor.getPtgs(formulaCell); + Ptg[] poiPtgs = FormulaParser.parse(formula, null); + int nExcelTokens = excelPtgs.length; + int nPoiTokens = poiPtgs.length; + if (nExcelTokens != nPoiTokens) { + if (nExcelTokens == nPoiTokens + 1 && excelPtgs[0].getClass() == AttrPtg.class) { + // compensate for missing tAttrVolatile, which belongs in any formula + // involving OFFSET() et al. POI currently does not insert where required + Ptg[] temp = new Ptg[nExcelTokens]; + temp[0] = excelPtgs[0]; + System.arraycopy(poiPtgs, 0, temp, 1, nPoiTokens); + poiPtgs = temp; + } else { + throw new RuntimeException("Expected " + nExcelTokens + " tokens but got " + + nPoiTokens); + } + } + boolean hasMismatch = false; + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < nExcelTokens; i++) { + Ptg poiPtg = poiPtgs[i]; + Ptg excelPtg = excelPtgs[i]; + if (!areTokenClassesSame(poiPtg, excelPtg)) { + hasMismatch = true; + sb.append(" mismatch token type[" + i + "] " + getShortClassName(excelPtg) + " " + + getOperandClassName(excelPtg) + " - " + getShortClassName(poiPtg) + " " + + getOperandClassName(poiPtg)); + sb.append(NEW_LINE); + continue; + } + if (poiPtg.isBaseToken()) { + continue; + } + sb.append(" token[" + i + "] " + excelPtg.toString() + " " + + getOperandClassName(excelPtg)); + + if (excelPtg.getPtgClass() != poiPtg.getPtgClass()) { + hasMismatch = true; + sb.append(" - was " + getOperandClassName(poiPtg)); + } + sb.append(NEW_LINE); + } + if (hasMismatch) { + throw new AssertionFailedError(sb.toString()); + } + } + + private boolean areTokenClassesSame(Ptg poiPtg, Ptg excelPtg) { + if (excelPtg.getClass() == poiPtg.getClass()) { + return true; + } + if (poiPtg.getClass() == ReferencePtg.class) { + // TODO - remove funny subclasses of ReferencePtg + return excelPtg instanceof ReferencePtg; + } + return false; + } + + private String getShortClassName(Object o) { + String cn = o.getClass().getName(); + int pos = cn.lastIndexOf('.'); + return cn.substring(pos + 1); + } + + private static String getOperandClassName(Ptg ptg) { + byte ptgClass = ptg.getPtgClass(); + switch (ptgClass) { + case Ptg.CLASS_REF: return "R"; + case Ptg.CLASS_VALUE: return "V"; + case Ptg.CLASS_ARRAY: return "A"; + } + throw new RuntimeException("Unknown operand class (" + ptgClass + ")"); + } +} diff --git a/src/testcases/org/apache/poi/hssf/usermodel/FormulaExtractor.java b/src/testcases/org/apache/poi/hssf/usermodel/FormulaExtractor.java new file mode 100644 index 000000000..d657647ea --- /dev/null +++ b/src/testcases/org/apache/poi/hssf/usermodel/FormulaExtractor.java @@ -0,0 +1,49 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hssf.usermodel; + +import java.util.List; + +import org.apache.poi.hssf.record.CellValueRecordInterface; +import org.apache.poi.hssf.record.aggregates.FormulaRecordAggregate; +import org.apache.poi.hssf.record.formula.Ptg; + +/** + * Test utility class to get Ptg arrays out of formula cells + * + * @author Josh Micich + */ +public final class FormulaExtractor { + + private FormulaExtractor() { + // no instances of this class + } + + public static Ptg[] getPtgs(HSSFCell cell) { + CellValueRecordInterface vr = cell.getCellValueRecord(); + if (!(vr instanceof FormulaRecordAggregate)) { + throw new IllegalArgumentException("Not a formula cell"); + } + FormulaRecordAggregate fra = (FormulaRecordAggregate) vr; + List tokens = fra.getFormulaRecord().getParsedExpression(); + Ptg[] result = new Ptg[tokens.size()]; + tokens.toArray(result); + return result; + } + +}