/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.model;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
import java.util.regex.Pattern;
//import PTG's .. since we need everything, import *
import org.apache.poi.hssf.record.formula.*;
import org.apache.poi.hssf.record.formula.function.FunctionMetadata;
import org.apache.poi.hssf.record.formula.function.FunctionMetadataRegistry;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
/**
* This class parses a formula string into a List of tokens in RPN order.
* Inspired by
* Lets Build a Compiler, by Jack Crenshaw
* BNF for the formula expression is :
*
* Primarily used by test cases when testing for specific parsing exceptions.
* For IF Formulas, additional PTGs are added to the tokens * @param name * @param numArgs * @return Ptg a null is returned if we're in an IF formula, it needs extreme manipulation and is handled in this function */ private ParseNode getFunction(String name, NamePtg namePtg, ParseNode[] args) { FunctionMetadata fm = FunctionMetadataRegistry.getFunctionByName(name.toUpperCase()); int numArgs = args.length; if(fm == null) { if (namePtg == null) { throw new IllegalStateException("NamePtg must be supplied for external functions"); } // must be external function ParseNode[] allArgs = new ParseNode[numArgs+1]; allArgs[0] = new ParseNode(namePtg); System.arraycopy(args, 0, allArgs, 1, numArgs); return new ParseNode(new FuncVarPtg(name, (byte)(numArgs+1)), allArgs); } if (namePtg != null) { throw new IllegalStateException("NamePtg no applicable to internal functions"); } boolean isVarArgs = !fm.hasFixedArgsLength(); int funcIx = fm.getIndex(); validateNumArgs(args.length, fm); AbstractFunctionPtg retval; if(isVarArgs) { retval = new FuncVarPtg(name, (byte)numArgs); } else { retval = new FuncPtg(funcIx); } return new ParseNode(retval, args); } private void validateNumArgs(int numArgs, FunctionMetadata fm) { if(numArgs < fm.getMinParams()) { String msg = "Too few arguments to function '" + fm.getName() + "'. "; if(fm.hasFixedArgsLength()) { msg += "Expected " + fm.getMinParams(); } else { msg += "At least " + fm.getMinParams() + " were expected"; } msg += " but got " + numArgs + "."; throw new FormulaParseException(msg); } if(numArgs > fm.getMaxParams()) { String msg = "Too many arguments to function '" + fm.getName() + "'. "; if(fm.hasFixedArgsLength()) { msg += "Expected " + fm.getMaxParams(); } else { msg += "At most " + fm.getMaxParams() + " were expected"; } msg += " but got " + numArgs + "."; throw new FormulaParseException(msg); } } private static boolean isArgumentDelimiter(char ch) { return ch == ',' || ch == ')'; } /** get arguments to a function */ private ParseNode[] Arguments() { //average 2 args per function List temp = new ArrayList(2); SkipWhite(); if(look == ')') { return ParseNode.EMPTY_ARRAY; } boolean missedPrevArg = true; int numArgs = 0; while (true) { SkipWhite(); if (isArgumentDelimiter(look)) { if (missedPrevArg) { temp.add(new ParseNode(MissingArgPtg.instance)); numArgs++; } if (look == ')') { break; } Match(','); missedPrevArg = true; continue; } temp.add(comparisonExpression()); numArgs++; missedPrevArg = false; SkipWhite(); if (!isArgumentDelimiter(look)) { throw expected("',' or ')'"); } } ParseNode[] result = new ParseNode[temp.size()]; temp.toArray(result); return result; } /** Parse and Translate a Math Factor */ private ParseNode powerFactor() { ParseNode result = percentFactor(); while(true) { SkipWhite(); if(look != '^') { return result; } Match('^'); ParseNode other = percentFactor(); result = new ParseNode(PowerPtg.instance, result, other); } } private ParseNode percentFactor() { ParseNode result = parseSimpleFactor(); while(true) { SkipWhite(); if(look != '%') { return result; } Match('%'); result = new ParseNode(PercentPtg.instance, result); } } /** * factors (without ^ or % ) */ private ParseNode parseSimpleFactor() { SkipWhite(); switch(look) { case '#': return new ParseNode(parseErrorLiteral()); case '-': Match('-'); return new ParseNode(UnaryMinusPtg.instance, powerFactor()); case '+': Match('+'); return new ParseNode(UnaryPlusPtg.instance, powerFactor()); case '(': Match('('); ParseNode inside = comparisonExpression(); Match(')'); return new ParseNode(ParenthesisPtg.instance, inside); case '"': return new ParseNode(parseStringLiteral()); } if (IsAlpha(look) || look == '\''){ return parseFunctionOrIdentifier(); } // else - assume number return new ParseNode(parseNumber()); } private Ptg parseNumber() { String number2 = null; String exponent = null; String number1 = GetNum(); if (look == '.') { GetChar(); number2 = GetNum(); } if (look == 'E') { GetChar(); String sign = ""; if (look == '+') { GetChar(); } else if (look == '-') { GetChar(); sign = "-"; } String number = GetNum(); if (number == null) { throw expected("Integer"); } exponent = sign + number; } if (number1 == null && number2 == null) { throw expected("Integer"); } return getNumberPtgFromString(number1, number2, exponent); } private ErrPtg parseErrorLiteral() { Match('#'); String part1 = GetName().toUpperCase(); switch(part1.charAt(0)) { case 'V': if(part1.equals("VALUE")) { Match('!'); return ErrPtg.VALUE_INVALID; } throw expected("#VALUE!"); case 'R': if(part1.equals("REF")) { Match('!'); return ErrPtg.REF_INVALID; } throw expected("#REF!"); case 'D': if(part1.equals("DIV")) { Match('/'); Match('0'); Match('!'); return ErrPtg.DIV_ZERO; } throw expected("#DIV/0!"); case 'N': if(part1.equals("NAME")) { Match('?'); // only one that ends in '?' return ErrPtg.NAME_INVALID; } if(part1.equals("NUM")) { Match('!'); return ErrPtg.NUM_ERROR; } if(part1.equals("NULL")) { Match('!'); return ErrPtg.NULL_INTERSECTION; } if(part1.equals("N")) { Match('/'); if(look != 'A' && look != 'a') { throw expected("#N/A"); } Match(look); // Note - no '!' or '?' suffix return ErrPtg.N_A; } throw expected("#NAME?, #NUM!, #NULL! or #N/A"); } throw expected("#VALUE!, #REF!, #DIV/0!, #NAME?, #NUM!, #NULL! or #N/A"); } /** * Get a PTG for an integer from its string representation. * return Int or Number Ptg based on size of input */ private static Ptg getNumberPtgFromString(String number1, String number2, String exponent) { StringBuffer number = new StringBuffer(); if (number2 == null) { number.append(number1); if (exponent != null) { number.append('E'); number.append(exponent); } String numberStr = number.toString(); int intVal; try { intVal = Integer.parseInt(numberStr); } catch (NumberFormatException e) { return new NumberPtg(numberStr); } if (IntPtg.isInRange(intVal)) { return new IntPtg(intVal); } return new NumberPtg(numberStr); } if (number1 != null) { number.append(number1); } number.append('.'); number.append(number2); if (exponent != null) { number.append('E'); number.append(exponent); } return new NumberPtg(number.toString()); } private StringPtg parseStringLiteral() { Match('"'); StringBuffer token = new StringBuffer(); while (true) { if (look == '"') { GetChar(); if (look != '"') { break; } } token.append(look); GetChar(); } return new StringPtg(token.toString()); } /** Parse and Translate a Math Term */ private ParseNode Term() { ParseNode result = powerFactor(); while(true) { SkipWhite(); Ptg operator; switch(look) { case '*': Match('*'); operator = MultiplyPtg.instance; break; case '/': Match('/'); operator = DividePtg.instance; break; default: return result; // finished with Term } ParseNode other = powerFactor(); result = new ParseNode(operator, result, other); } } private ParseNode comparisonExpression() { ParseNode result = concatExpression(); while (true) { SkipWhite(); switch(look) { case '=': case '>': case '<': Ptg comparisonToken = getComparisonToken(); ParseNode other = concatExpression(); result = new ParseNode(comparisonToken, result, other); continue; } return result; // finished with predicate expression } } private Ptg getComparisonToken() { if(look == '=') { Match(look); return EqualPtg.instance; } boolean isGreater = look == '>'; Match(look); if(isGreater) { if(look == '=') { Match('='); return GreaterEqualPtg.instance; } return GreaterThanPtg.instance; } switch(look) { case '=': Match('='); return LessEqualPtg.instance; case '>': Match('>'); return NotEqualPtg.instance; } return LessThanPtg.instance; } private ParseNode concatExpression() { ParseNode result = additiveExpression(); while (true) { SkipWhite(); if(look != '&') { break; // finished with concat expression } Match('&'); ParseNode other = additiveExpression(); result = new ParseNode(ConcatPtg.instance, result, other); } return result; } /** Parse and Translate an Expression */ private ParseNode additiveExpression() { ParseNode result = Term(); while (true) { SkipWhite(); Ptg operator; switch(look) { case '+': Match('+'); operator = AddPtg.instance; break; case '-': Match('-'); operator = SubtractPtg.instance; break; default: return result; // finished with additive expression } ParseNode other = Term(); result = new ParseNode(operator, result, other); } } //{--------------------------------------------------------------} //{ Parse and Translate an Assignment Statement } /** procedure Assignment; var Name: string[8]; begin Name := GetName; Match('='); Expression; end; **/ /** * API call to execute the parsing of the formula * @deprecated use Ptg[] FormulaParser.parse(String, HSSFWorkbook) directly */ public void parse() { pointer=0; GetChar(); _rootNode = comparisonExpression(); if(pointer <= formulaLength) { String msg = "Unused input [" + formulaString.substring(pointer-1) + "] after attempting to parse the formula [" + formulaString + "]"; throw new FormulaParseException(msg); } } /********************************* * PARSER IMPLEMENTATION ENDS HERE * EXCEL SPECIFIC METHODS BELOW *******************************/ /** API call to retrive the array of Ptgs created as * a result of the parsing */ public Ptg[] getRPNPtg() { return getRPNPtg(FORMULA_TYPE_CELL); } public Ptg[] getRPNPtg(int formulaType) { OperandClassTransformer oct = new OperandClassTransformer(formulaType); // RVA is for 'operand class': 'reference', 'value', 'array' oct.transformFormula(_rootNode); return ParseNode.toTokenArray(_rootNode); } /** * Convenience method which takes in a list then passes it to the * other toFormulaString signature. * @param book workbook for 3D and named references * @param lptgs list of Ptg, can be null or empty * @return a human readable String */ public static String toFormulaString(HSSFWorkbook book, List lptgs) { String retval = null; if (lptgs == null || lptgs.size() == 0) return "#NAME"; Ptg[] ptgs = new Ptg[lptgs.size()]; ptgs = (Ptg[])lptgs.toArray(ptgs); retval = toFormulaString(book, ptgs); return retval; } /** * Convenience method which takes in a list then passes it to the * other toFormulaString signature. Works on the current * workbook for 3D and named references * @param lptgs list of Ptg, can be null or empty * @return a human readable String */ public String toFormulaString(List lptgs) { return toFormulaString(book, lptgs); } /** * Static method to convert an array of Ptgs in RPN order * to a human readable string format in infix mode. * @param book workbook for named and 3D references * @param ptgs array of Ptg, can be null or empty * @return a human readable String */ public static String toFormulaString(HSSFWorkbook book, Ptg[] ptgs) { if (ptgs == null || ptgs.length == 0) { // TODO - what is the justification for returning "#NAME" (which is not "#NAME?", btw) return "#NAME"; } Stack stack = new Stack(); for (int i=0 ; i < ptgs.length; i++) { Ptg ptg = ptgs[i]; // TODO - what about MemNoMemPtg? if(ptg instanceof MemAreaPtg || ptg instanceof MemFuncPtg || ptg instanceof MemErrPtg) { // marks the start of a list of area expressions which will be naturally combined // by their trailing operators (e.g. UnionPtg) // TODO - put comment and throw exception in toFormulaString() of these classes continue; } if (ptg instanceof ParenthesisPtg) { String contents = (String)stack.pop(); stack.push ("(" + contents + ")"); continue; } if (ptg instanceof AttrPtg) { AttrPtg attrPtg = ((AttrPtg) ptg); if (attrPtg.isOptimizedIf() || attrPtg.isOptimizedChoose() || attrPtg.isGoto()) { continue; } if (attrPtg.isSpace()) { // POI currently doesn't render spaces in formulas continue; // but if it ever did, care must be taken: // tAttrSpace comes *before* the operand it applies to, which may be consistent // with how the formula text appears but is against the RPN ordering assumed here } if (attrPtg.isSemiVolatile()) { // similar to tAttrSpace - RPN is violated continue; } if (attrPtg.isSum()) { String[] operands = getOperands(stack, attrPtg.getNumberOfOperands()); stack.push(attrPtg.toFormulaString(operands)); continue; } throw new RuntimeException("Unexpected tAttr: " + attrPtg.toString()); } if (! (ptg instanceof OperationPtg)) { stack.push(ptg.toFormulaString(book)); continue; } OperationPtg o = (OperationPtg) ptg; String[] operands = getOperands(stack, o.getNumberOfOperands()); stack.push(o.toFormulaString(operands)); } if(stack.isEmpty()) { // inspection of the code above reveals that every stack.pop() is followed by a // stack.push(). So this is either an internal error or impossible. throw new IllegalStateException("Stack underflow"); } String result = (String) stack.pop(); if(!stack.isEmpty()) { // Might be caused by some tokens like AttrPtg and Mem*Ptg, which really shouldn't // put anything on the stack throw new IllegalStateException("too much stuff left on the stack"); } return result; } private static String[] getOperands(Stack stack, int nOperands) { String[] operands = new String[nOperands]; for (int j = nOperands-1; j >= 0; j--) { // reverse iteration because args were pushed in-order if(stack.isEmpty()) { String msg = "Too few arguments supplied to operation. Expected (" + nOperands + ") operands but got (" + (nOperands - j - 1) + ")"; throw new IllegalStateException(msg); } operands[j] = (String) stack.pop(); } return operands; } /** * Static method to convert an array of Ptgs in RPN order * to a human readable string format in infix mode. Works * on the current workbook for named and 3D references. * @param ptgs array of Ptg, can be null or empty * @return a human readable String */ public String toFormulaString(Ptg[] ptgs) { return toFormulaString(book, ptgs); } }