2002-07-14 20:18:48 -04:00
/ * = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
2006-12-22 14:18:16 -05:00
Licensed to the Apache Software Foundation ( ASF ) under one or more
contributor license agreements . See the NOTICE file distributed with
this work for additional information regarding copyright ownership .
The ASF licenses this file to You under the Apache License , Version 2 . 0
( the " License " ) ; you may not use this file except in compliance with
the License . You may obtain a copy of the License at
2004-04-09 09:05:39 -04:00
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an " AS IS " BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = * /
2002-07-14 20:18:48 -04:00
package org.apache.poi.hssf.model ;
2002-10-08 20:05:55 -04:00
import java.util.ArrayList ;
2003-03-14 20:57:41 -05:00
import java.util.Iterator ;
2002-10-08 20:05:55 -04:00
import java.util.List ;
2008-03-04 11:53:32 -05:00
import java.util.Stack ;
2008-01-08 10:08:51 -05:00
import java.util.regex.Pattern ;
2002-07-14 20:18:48 -04:00
2003-05-04 14:22:09 -04:00
//import PTG's .. since we need everything, import *
import org.apache.poi.hssf.record.formula.* ;
2003-03-14 20:57:41 -05:00
2002-07-14 20:18:48 -04:00
/ * *
* This class parses a formula string into a List of tokens in RPN order .
2008-03-04 11:53:32 -05:00
* Inspired by
2002-07-14 20:18:48 -04:00
* Lets Build a Compiler , by Jack Crenshaw
* BNF for the formula expression is :
* < expression > : : = < term > [ < addop > < term > ] *
* < term > : : = < factor > [ < mulop > < factor > ] *
* < factor > : : = < number > | ( < expression > ) | < cellRef > | < function >
* < function > : : = < functionName > ( [ expression [ , expression ] * ] )
*
2005-07-08 11:19:53 -04:00
* @author Avik Sengupta < avik at apache dot org >
2002-07-14 20:18:48 -04:00
* @author Andrew C . oliver ( acoliver at apache dot org )
2002-07-14 22:06:22 -04:00
* @author Eric Ladner ( eladner at goldinc dot com )
2003-05-04 14:22:09 -04:00
* @author Cameron Riley ( criley at ekmail . com )
2004-01-01 02:51:34 -05:00
* @author Peter M . Murray ( pete at quantrix dot com )
2007-09-18 07:53:28 -04:00
* @author Pavel Krupets ( pkrupets at palmtreebusiness dot com )
2002-07-14 20:18:48 -04:00
* /
2008-03-04 11:53:32 -05:00
public final class FormulaParser {
2002-07-14 20:18:48 -04:00
2008-03-04 11:53:32 -05:00
/ * *
* Specific exception thrown when a supplied formula does not parse properly . < br / >
* Primarily used by test cases when testing for specific parsing exceptions . < / p >
*
* /
static final class FormulaParseException extends RuntimeException {
// This class was given package scope until it would become clear that it is useful to
// general client code.
public FormulaParseException ( String msg ) {
super ( msg ) ;
}
}
2002-07-14 20:18:48 -04:00
public static int FORMULA_TYPE_CELL = 0 ;
public static int FORMULA_TYPE_SHARED = 1 ;
public static int FORMULA_TYPE_ARRAY = 2 ;
public static int FORMULA_TYPE_CONDFOMRAT = 3 ;
public static int FORMULA_TYPE_NAMEDRANGE = 4 ;
2008-03-04 11:53:32 -05:00
private final String formulaString ;
private final int formulaLength ;
private int pointer ;
private final List tokens = new Stack ( ) ;
2008-01-08 10:08:51 -05:00
/ * *
* Used for spotting if we have a cell reference ,
* or a named range
* /
private final static Pattern CELL_REFERENCE_PATTERN = Pattern . compile ( " (?:('?)[^: \\ \\ / \\ ? \\ * \\ [ \\ ]]+ \\ 1!)? \\ $?[A-Za-z]+ \\ $?[ \\ d]+ " ) ;
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
private static char TAB = '\t' ;
2008-03-04 11:53:32 -05:00
/ * *
* Lookahead Character .
* gets value '\0' when the input string is exhausted
* /
private char look ;
private Workbook book ;
/ * *
2007-10-15 06:34:40 -04:00
* Create the formula parser , with the string that is to be
* parsed against the supplied workbook .
* A later call the parse ( ) method to return ptg list in
* rpn order , then call the getRPNPtg ( ) to retrive the
* parse results .
* This class is recommended only for single threaded use .
2008-03-04 11:53:32 -05:00
*
2007-10-15 06:34:40 -04:00
* If you only have a usermodel . HSSFWorkbook , and not a
* model . Workbook , then use the convenience method on
2008-03-04 11:53:32 -05:00
* usermodel . HSSFFormulaEvaluator
2002-07-14 20:18:48 -04:00
* /
public FormulaParser ( String formula , Workbook book ) {
formulaString = formula ;
pointer = 0 ;
this . book = book ;
2008-03-04 11:53:32 -05:00
formulaLength = formulaString . length ( ) ;
}
public static Ptg [ ] parse ( String formula , Workbook book ) {
FormulaParser fp = new FormulaParser ( formula , book ) ;
fp . parse ( ) ;
return fp . getRPNPtg ( ) ;
2002-07-14 20:18:48 -04:00
}
/** Read New Character From Input Stream */
private void GetChar ( ) {
2002-07-14 22:06:22 -04:00
// Check to see if we've walked off the end of the string.
2008-03-04 11:53:32 -05:00
if ( pointer > formulaLength ) {
throw new RuntimeException ( " too far " ) ;
}
if ( pointer < formulaLength ) {
look = formulaString . charAt ( pointer ) ;
} else {
// Just return if so and reset 'look' to something to keep
// SkipWhitespace from spinning
2002-10-08 20:05:55 -04:00
look = ( char ) 0 ;
2008-03-04 11:53:32 -05:00
}
pointer + + ;
2003-05-04 14:22:09 -04:00
//System.out.println("Got char: "+ look);
2002-07-14 20:18:48 -04:00
}
/** Report What Was Expected */
2008-03-04 11:53:32 -05:00
private RuntimeException expected ( String s ) {
return new FormulaParseException ( s + " Expected " ) ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
/** Recognize an Alpha Character */
private boolean IsAlpha ( char c ) {
2005-05-31 15:10:47 -04:00
return Character . isLetter ( c ) | | c = = '$' | | c = = '_' ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
/** Recognize a Decimal Digit */
private boolean IsDigit ( char c ) {
//System.out.println("Checking digit for"+c);
return Character . isDigit ( c ) ;
}
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
/** Recognize an Alphanumeric */
private boolean IsAlNum ( char c ) {
return ( IsAlpha ( c ) | | IsDigit ( c ) ) ;
}
/** Recognize White Space */
private boolean IsWhite ( char c ) {
return ( c = = ' ' | | c = = TAB ) ;
}
/** Skip Over Leading White Space */
private void SkipWhite ( ) {
2002-10-08 20:05:55 -04:00
while ( IsWhite ( look ) ) {
2002-07-14 20:18:48 -04:00
GetChar ( ) ;
}
}
2008-03-04 11:53:32 -05:00
/ * *
* Consumes the next input character if it is equal to the one specified otherwise throws an
* unchecked exception . This method does < b > not < / b > consume whitespace ( before or after the
* matched character ) .
* /
2002-07-14 20:18:48 -04:00
private void Match ( char x ) {
2002-10-08 20:05:55 -04:00
if ( look ! = x ) {
2008-03-04 11:53:32 -05:00
throw expected ( " ' " + x + " ' " ) ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
GetChar ( ) ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
/** Get an Identifier */
private String GetName ( ) {
StringBuffer Token = new StringBuffer ( ) ;
2004-01-01 02:51:34 -05:00
if ( ! IsAlpha ( look ) & & look ! = '\'' ) {
2008-03-04 11:53:32 -05:00
throw expected ( " Name " ) ;
2002-07-14 20:18:48 -04:00
}
2004-01-01 02:51:34 -05:00
if ( look = = '\'' )
{
2008-03-04 11:53:32 -05:00
Match ( '\'' ) ;
boolean done = look = = '\'' ;
while ( ! done )
{
Token . append ( look ) ;
GetChar ( ) ;
if ( look = = '\'' )
{
Match ( '\'' ) ;
done = look ! = '\'' ;
}
}
2002-07-14 20:18:48 -04:00
}
2004-01-01 02:51:34 -05:00
else
{
2008-03-04 11:53:32 -05:00
while ( IsAlNum ( look ) ) {
Token . append ( look ) ;
GetChar ( ) ;
}
2002-08-03 14:16:54 -04:00
}
return Token . toString ( ) ;
}
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
/** Get a Number */
private String GetNum ( ) {
2006-08-09 16:03:36 -04:00
StringBuffer value = new StringBuffer ( ) ;
2008-03-04 11:53:32 -05:00
2007-09-18 07:53:28 -04:00
while ( IsDigit ( this . look ) ) {
value . append ( this . look ) ;
2002-07-14 20:18:48 -04:00
GetChar ( ) ;
}
2007-09-18 07:53:28 -04:00
return value . length ( ) = = 0 ? null : value . toString ( ) ;
2002-07-14 20:18:48 -04:00
}
/** Parse and Translate a String Identifier */
2008-03-04 11:53:32 -05:00
private Ptg parseIdent ( ) {
2002-07-14 20:18:48 -04:00
String name ;
name = GetName ( ) ;
2002-10-08 20:05:55 -04:00
if ( look = = '(' ) {
2002-07-14 20:18:48 -04:00
//This is a function
2008-03-04 11:53:32 -05:00
return function ( name ) ;
}
if ( look = = ':' | | look = = '.' ) { // this is a AreaReference
2007-09-18 07:53:28 -04:00
GetChar ( ) ;
2008-03-04 11:53:32 -05:00
2007-09-18 07:53:28 -04:00
while ( look = = '.' ) { // formulas can have . or .. or ... instead of :
GetChar ( ) ;
}
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
String first = name ;
String second = GetName ( ) ;
2008-03-04 11:53:32 -05:00
return new AreaPtg ( first + " : " + second ) ;
}
if ( look = = '!' ) {
2002-07-14 20:18:48 -04:00
Match ( '!' ) ;
String sheetName = name ;
String first = GetName ( ) ;
short externIdx = book . checkExternSheet ( book . getSheetIndex ( sheetName ) ) ;
2002-10-08 20:05:55 -04:00
if ( look = = ':' ) {
2002-07-14 20:18:48 -04:00
Match ( ':' ) ;
String second = GetName ( ) ;
2006-08-28 08:18:10 -04:00
if ( look = = '!' ) {
2008-03-04 11:53:32 -05:00
//The sheet name was included in both of the areas. Only really
//need it once
Match ( '!' ) ;
String third = GetName ( ) ;
if ( ! sheetName . equals ( second ) )
throw new RuntimeException ( " Unhandled double sheet reference. " ) ;
return new Area3DPtg ( first + " : " + third , externIdx ) ;
2006-08-28 08:18:10 -04:00
}
2008-03-04 11:53:32 -05:00
return new Area3DPtg ( first + " : " + second , externIdx ) ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
return new Ref3DPtg ( first , externIdx ) ;
}
if ( name . equalsIgnoreCase ( " TRUE " ) | | name . equalsIgnoreCase ( " FALSE " ) ) {
return new BoolPtg ( name . toUpperCase ( ) ) ;
}
// This can be either a cell ref or a named range
// Try to spot which it is
boolean cellRef = CELL_REFERENCE_PATTERN . matcher ( name ) . matches ( ) ;
if ( cellRef ) {
return new ReferencePtg ( name ) ;
}
for ( int i = 0 ; i < book . getNumNames ( ) ; i + + ) {
// named range name matching is case insensitive
if ( book . getNameRecord ( i ) . getNameText ( ) . equalsIgnoreCase ( name ) ) {
return new NamePtg ( name , book ) ;
2002-07-14 20:18:48 -04:00
}
}
2008-03-04 11:53:32 -05:00
throw new FormulaParseException ( " Found reference to named range \" "
+ name + " \" , but that named range wasn't defined! " ) ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
2003-03-14 20:57:41 -05:00
/ * *
* Adds a pointer to the last token to the latest function argument list .
* @param obj
* /
2008-03-04 11:53:32 -05:00
private void addArgumentPointer ( List argumentPointers ) {
argumentPointers . add ( tokens . get ( tokens . size ( ) - 1 ) ) ;
2003-03-14 20:57:41 -05:00
}
2008-03-04 11:53:32 -05:00
/ * *
* Note - Excel function names are ' case aware but not case sensitive ' . This method may end
* up creating a defined name record in the workbook if the specified name is not an internal
* Excel function , and has not been encountered before .
*
* @param name case preserved function name ( as it was entered / appeared in the formula ) .
* /
private Ptg function ( String name ) {
int numArgs = 0 ;
// Note regarding parameter -
if ( ! AbstractFunctionPtg . isInternalFunctionName ( name ) ) {
// external functions get a Name token which points to a defined name record
NamePtg nameToken = new NamePtg ( name , this . book ) ;
// in the token tree, the name is more or less the first argument
numArgs + + ;
tokens . add ( nameToken ) ;
}
//average 2 args per function
List argumentPointers = new ArrayList ( 2 ) ;
2002-07-14 20:18:48 -04:00
Match ( '(' ) ;
2008-03-04 11:53:32 -05:00
numArgs + = Arguments ( argumentPointers ) ;
2002-07-14 20:18:48 -04:00
Match ( ')' ) ;
2003-08-31 02:16:57 -04:00
2008-03-04 11:53:32 -05:00
return getFunction ( name , numArgs , argumentPointers ) ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
2003-03-14 20:57:41 -05:00
/ * *
* Adds the size of all the ptgs after the provided index ( inclusive ) .
* < p >
* Initially used to count a goto
* @param index
* @return int
* /
private int getPtgSize ( int index ) {
2008-03-04 11:53:32 -05:00
int count = 0 ;
Iterator ptgIterator = tokens . listIterator ( index ) ;
while ( ptgIterator . hasNext ( ) ) {
Ptg ptg = ( Ptg ) ptgIterator . next ( ) ;
count + = ptg . getSize ( ) ;
}
return count ;
2003-03-14 20:57:41 -05:00
}
2008-03-04 11:53:32 -05:00
2003-03-14 20:57:41 -05:00
private int getPtgSize ( int start , int end ) {
2003-05-04 14:22:09 -04:00
int count = 0 ;
int index = start ;
Iterator ptgIterator = tokens . listIterator ( index ) ;
while ( ptgIterator . hasNext ( ) & & index < = end ) {
Ptg ptg = ( Ptg ) ptgIterator . next ( ) ;
count + = ptg . getSize ( ) ;
index + + ;
}
2008-03-04 11:53:32 -05:00
2003-05-04 14:22:09 -04:00
return count ;
2003-03-14 20:57:41 -05:00
}
/ * *
* Generates the variable function ptg for the formula .
* < p >
2008-03-04 11:53:32 -05:00
* For IF Formulas , additional PTGs are added to the tokens
2003-03-14 20:57:41 -05:00
* @param name
* @param numArgs
* @return Ptg a null is returned if we ' re in an IF formula , it needs extreme manipulation and is handled in this function
* /
2008-03-04 11:53:32 -05:00
private AbstractFunctionPtg getFunction ( String name , int numArgs , List argumentPointers ) {
AbstractFunctionPtg retval = new FuncVarPtg ( name , ( byte ) numArgs ) ;
if ( ! name . equals ( AbstractFunctionPtg . FUNCTION_NAME_IF ) ) {
// early return for everything else besides IF()
return retval ;
2002-09-02 17:16:29 -04:00
}
2008-03-04 11:53:32 -05:00
AttrPtg ifPtg = new AttrPtg ( ) ;
ifPtg . setData ( ( short ) 7 ) ; //mirroring excel output
ifPtg . setOptimizedIf ( true ) ;
if ( argumentPointers . size ( ) ! = 2 & & argumentPointers . size ( ) ! = 3 ) {
throw new IllegalArgumentException ( " [ " + argumentPointers . size ( ) + " ] Arguments Found - An IF formula requires 2 or 3 arguments. IF(CONDITION, TRUE_VALUE, FALSE_VALUE [OPTIONAL] " ) ;
}
//Biffview of an IF formula record indicates the attr ptg goes after the condition ptgs and are
//tracked in the argument pointers
//The beginning first argument pointer is the last ptg of the condition
int ifIndex = tokens . indexOf ( argumentPointers . get ( 0 ) ) + 1 ;
tokens . add ( ifIndex , ifPtg ) ;
//we now need a goto ptgAttr to skip to the end of the formula after a true condition
//the true condition is should be inserted after the last ptg in the first argument
int gotoIndex = tokens . indexOf ( argumentPointers . get ( 1 ) ) + 1 ;
AttrPtg goto1Ptg = new AttrPtg ( ) ;
goto1Ptg . setGoto ( true ) ;
tokens . add ( gotoIndex , goto1Ptg ) ;
if ( numArgs > 2 ) { //only add false jump if there is a false condition
//second goto to skip past the function ptg
AttrPtg goto2Ptg = new AttrPtg ( ) ;
goto2Ptg . setGoto ( true ) ;
goto2Ptg . setData ( ( short ) ( retval . getSize ( ) - 1 ) ) ;
//Page 472 of the Microsoft Excel Developer's kit states that:
//The b(or w) field specifies the number byes (or words to skip, minus 1
tokens . add ( goto2Ptg ) ; //this goes after all the arguments are defined
}
//data portion of the if ptg points to the false subexpression (Page 472 of MS Excel Developer's kit)
//count the number of bytes after the ifPtg to the False Subexpression
//doesn't specify -1 in the documentation
ifPtg . setData ( ( short ) ( getPtgSize ( ifIndex + 1 , gotoIndex ) ) ) ;
//count all the additional (goto) ptgs but dont count itself
int ptgCount = this . getPtgSize ( gotoIndex ) - goto1Ptg . getSize ( ) + retval . getSize ( ) ;
if ( ptgCount > Short . MAX_VALUE ) {
throw new RuntimeException ( " Ptg Size exceeds short when being specified for a goto ptg in an if " ) ;
}
goto1Ptg . setData ( ( short ) ( ptgCount - 1 ) ) ;
2003-05-04 14:22:09 -04:00
return retval ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
private static boolean isArgumentDelimiter ( char ch ) {
return ch = = ',' | | ch = = ')' ;
}
2002-07-14 20:18:48 -04:00
/** get arguments to a function */
2008-03-04 11:53:32 -05:00
private int Arguments ( List argumentPointers ) {
SkipWhite ( ) ;
if ( look = = ')' ) {
return 0 ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
boolean missedPrevArg = true ;
int numArgs = 0 ;
while ( true ) {
SkipWhite ( ) ;
if ( isArgumentDelimiter ( look ) ) {
if ( missedPrevArg ) {
tokens . add ( new MissingArgPtg ( ) ) ;
addArgumentPointer ( argumentPointers ) ;
numArgs + + ;
}
if ( look = = ')' ) {
break ;
}
Match ( ',' ) ;
missedPrevArg = true ;
continue ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
comparisonExpression ( ) ;
addArgumentPointer ( argumentPointers ) ;
2002-07-14 20:18:48 -04:00
numArgs + + ;
2008-03-04 11:53:32 -05:00
missedPrevArg = false ;
2002-07-14 20:18:48 -04:00
}
return numArgs ;
}
/** Parse and Translate a Math Factor */
2008-03-04 11:53:32 -05:00
private void powerFactor ( ) {
percentFactor ( ) ;
while ( true ) {
SkipWhite ( ) ;
if ( look ! = '^' ) {
return ;
}
Match ( '^' ) ;
percentFactor ( ) ;
tokens . add ( new PowerPtg ( ) ) ;
2005-05-18 14:58:57 -04:00
}
2008-03-04 11:53:32 -05:00
}
private void percentFactor ( ) {
tokens . add ( parseSimpleFactor ( ) ) ;
while ( true ) {
SkipWhite ( ) ;
if ( look ! = '%' ) {
return ;
2006-08-09 16:03:36 -04:00
}
2008-03-04 11:53:32 -05:00
Match ( '%' ) ;
tokens . add ( new PercentPtg ( ) ) ;
}
}
/ * *
* factors ( without ^ or % )
* /
private Ptg parseSimpleFactor ( ) {
SkipWhite ( ) ;
switch ( look ) {
case '#' :
return parseErrorLiteral ( ) ;
case '-' :
Match ( '-' ) ;
powerFactor ( ) ;
return new UnaryMinusPtg ( ) ;
case '+' :
Match ( '+' ) ;
powerFactor ( ) ;
return new UnaryPlusPtg ( ) ;
case '(' :
Match ( '(' ) ;
comparisonExpression ( ) ;
Match ( ')' ) ;
return new ParenthesisPtg ( ) ;
case '"' :
return parseStringLiteral ( ) ;
case ',' :
case ')' :
return new MissingArgPtg ( ) ; // TODO - not quite the right place to recognise a missing arg
}
if ( IsAlpha ( look ) | | look = = '\'' ) {
return parseIdent ( ) ;
}
// else - assume number
return parseNumber ( ) ;
}
private Ptg parseNumber ( ) {
String number2 = null ;
String exponent = null ;
String number1 = GetNum ( ) ;
if ( look = = '.' ) {
GetChar ( ) ;
number2 = GetNum ( ) ;
}
if ( look = = 'E' ) {
GetChar ( ) ;
String sign = " " ;
if ( look = = '+' ) {
2007-09-18 07:53:28 -04:00
GetChar ( ) ;
2008-03-04 11:53:32 -05:00
} else if ( look = = '-' ) {
GetChar ( ) ;
sign = " - " ;
2006-08-09 16:03:36 -04:00
}
2008-03-04 11:53:32 -05:00
String number = GetNum ( ) ;
if ( number = = null ) {
throw expected ( " Integer " ) ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
exponent = sign + number ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
if ( number1 = = null & & number2 = = null ) {
throw expected ( " Integer " ) ;
}
return getNumberPtgFromString ( number1 , number2 , exponent ) ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
private ErrPtg parseErrorLiteral ( ) {
Match ( '#' ) ;
String part1 = GetName ( ) . toUpperCase ( ) ;
switch ( part1 . charAt ( 0 ) ) {
case 'V' :
if ( part1 . equals ( " VALUE " ) ) {
Match ( '!' ) ;
return ErrPtg . VALUE_INVALID ;
}
throw expected ( " #VALUE! " ) ;
case 'R' :
if ( part1 . equals ( " REF " ) ) {
Match ( '!' ) ;
return ErrPtg . REF_INVALID ;
}
throw expected ( " #REF! " ) ;
case 'D' :
if ( part1 . equals ( " DIV " ) ) {
Match ( '/' ) ;
Match ( '0' ) ;
Match ( '!' ) ;
return ErrPtg . DIV_ZERO ;
}
throw expected ( " #DIV/0! " ) ;
case 'N' :
if ( part1 . equals ( " NAME " ) ) {
Match ( '?' ) ; // only one that ends in '?'
return ErrPtg . NAME_INVALID ;
}
if ( part1 . equals ( " NUM " ) ) {
Match ( '!' ) ;
return ErrPtg . NUM_ERROR ;
}
if ( part1 . equals ( " NULL " ) ) {
Match ( '!' ) ;
return ErrPtg . NULL_INTERSECTION ;
}
if ( part1 . equals ( " N " ) ) {
Match ( '/' ) ;
if ( look ! = 'A' & & look ! = 'a' ) {
throw expected ( " #N/A " ) ;
}
Match ( look ) ;
// Note - no '!' or '?' suffix
return ErrPtg . N_A ;
}
throw expected ( " #NAME?, #NUM!, #NULL! or #N/A " ) ;
}
throw expected ( " #VALUE!, #REF!, #DIV/0!, #NAME?, #NUM!, #NULL! or #N/A " ) ;
}
/ * *
* Get a PTG for an integer from its string representation .
* return Int or Number Ptg based on size of input
* /
private static Ptg getNumberPtgFromString ( String number1 , String number2 , String exponent ) {
2007-09-18 07:53:28 -04:00
StringBuffer number = new StringBuffer ( ) ;
2008-03-04 11:53:32 -05:00
if ( number2 = = null ) {
number . append ( number1 ) ;
if ( exponent ! = null ) {
number . append ( 'E' ) ;
number . append ( exponent ) ;
}
2007-09-18 07:53:28 -04:00
String numberStr = number . toString ( ) ;
2008-03-04 11:53:32 -05:00
int intVal ;
2007-09-18 07:53:28 -04:00
try {
2008-03-04 11:53:32 -05:00
intVal = Integer . parseInt ( numberStr ) ;
2007-09-18 07:53:28 -04:00
} catch ( NumberFormatException e ) {
return new NumberPtg ( numberStr ) ;
}
2008-03-04 11:53:32 -05:00
if ( IntPtg . isInRange ( intVal ) ) {
return new IntPtg ( intVal ) ;
2007-09-18 07:53:28 -04:00
}
2008-03-04 11:53:32 -05:00
return new NumberPtg ( numberStr ) ;
}
if ( number1 ! = null ) {
number . append ( number1 ) ;
}
2002-07-14 20:18:48 -04:00
2008-03-04 11:53:32 -05:00
number . append ( '.' ) ;
number . append ( number2 ) ;
if ( exponent ! = null ) {
number . append ( 'E' ) ;
number . append ( exponent ) ;
}
return new NumberPtg ( number . toString ( ) ) ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
private StringPtg parseStringLiteral ( )
{
Match ( '"' ) ;
2003-05-06 23:40:52 -04:00
2008-03-04 11:53:32 -05:00
StringBuffer token = new StringBuffer ( ) ;
while ( true ) {
if ( look = = '"' ) {
GetChar ( ) ;
if ( look ! = '"' ) {
break ;
}
}
token . append ( look ) ;
GetChar ( ) ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
return new StringPtg ( token . toString ( ) ) ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
/** Parse and Translate a Math Term */
private void Term ( ) {
powerFactor ( ) ;
while ( true ) {
SkipWhite ( ) ;
switch ( look ) {
case '*' :
Match ( '*' ) ;
powerFactor ( ) ;
tokens . add ( new MultiplyPtg ( ) ) ;
continue ;
case '/' :
Match ( '/' ) ;
powerFactor ( ) ;
tokens . add ( new DividePtg ( ) ) ;
continue ;
}
return ; // finished with Term
}
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
private void comparisonExpression ( ) {
concatExpression ( ) ;
while ( true ) {
SkipWhite ( ) ;
switch ( look ) {
case '=' :
case '>' :
case '<' :
Ptg comparisonToken = getComparisonToken ( ) ;
concatExpression ( ) ;
tokens . add ( comparisonToken ) ;
continue ;
}
return ; // finished with predicate expression
}
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
private Ptg getComparisonToken ( ) {
if ( look = = '=' ) {
Match ( look ) ;
return new EqualPtg ( ) ;
}
boolean isGreater = look = = '>' ;
Match ( look ) ;
if ( isGreater ) {
if ( look = = '=' ) {
Match ( '=' ) ;
return new GreaterEqualPtg ( ) ;
}
return new GreaterThanPtg ( ) ;
}
switch ( look ) {
case '=' :
Match ( '=' ) ;
return new LessEqualPtg ( ) ;
case '>' :
Match ( '>' ) ;
return new NotEqualPtg ( ) ;
}
return new LessThanPtg ( ) ;
2002-09-02 17:16:29 -04:00
}
2002-07-14 20:18:48 -04:00
2003-05-04 14:22:09 -04:00
2008-03-04 11:53:32 -05:00
private void concatExpression ( ) {
additiveExpression ( ) ;
while ( true ) {
SkipWhite ( ) ;
if ( look ! = '&' ) {
break ; // finished with concat expression
}
Match ( '&' ) ;
additiveExpression ( ) ;
tokens . add ( new ConcatPtg ( ) ) ;
}
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
/** Parse and Translate an Expression */
2008-03-04 11:53:32 -05:00
private void additiveExpression ( ) {
2004-01-01 02:51:34 -05:00
Term ( ) ;
2008-03-04 11:53:32 -05:00
while ( true ) {
SkipWhite ( ) ;
switch ( look ) {
case '+' :
Match ( '+' ) ;
Term ( ) ;
tokens . add ( new AddPtg ( ) ) ;
continue ;
case '-' :
Match ( '-' ) ;
Term ( ) ;
tokens . add ( new SubtractPtg ( ) ) ;
continue ;
}
return ; // finished with additive expression
2002-07-14 20:18:48 -04:00
}
2003-05-04 14:22:09 -04:00
}
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
//{--------------------------------------------------------------}
//{ Parse and Translate an Assignment Statement }
/ * *
procedure Assignment ;
var Name : string [ 8 ] ;
begin
Name : = GetName ;
Match ( '=' ) ;
Expression ;
end ;
* * /
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
/ * * API call to execute the parsing of the formula
*
* /
public void parse ( ) {
2008-03-04 11:53:32 -05:00
pointer = 0 ;
GetChar ( ) ;
comparisonExpression ( ) ;
if ( pointer < = formulaLength ) {
String msg = " Unused input [ " + formulaString . substring ( pointer - 1 )
+ " ] after attempting to parse the formula [ " + formulaString + " ] " ;
throw new FormulaParseException ( msg ) ;
2002-07-14 20:18:48 -04:00
}
}
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
/ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* PARSER IMPLEMENTATION ENDS HERE
* EXCEL SPECIFIC METHODS BELOW
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * /
2008-03-04 11:53:32 -05:00
/ * * API call to retrive the array of Ptgs created as
2002-07-14 20:18:48 -04:00
* a result of the parsing
* /
public Ptg [ ] getRPNPtg ( ) {
return getRPNPtg ( FORMULA_TYPE_CELL ) ;
}
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
public Ptg [ ] getRPNPtg ( int formulaType ) {
Node node = createTree ( ) ;
setRootLevelRVA ( node , formulaType ) ;
setParameterRVA ( node , formulaType ) ;
return ( Ptg [ ] ) tokens . toArray ( new Ptg [ 0 ] ) ;
}
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
private void setRootLevelRVA ( Node n , int formulaType ) {
//Pg 16, excelfileformat.pdf @ openoffice.org
2008-03-04 11:53:32 -05:00
Ptg p = n . getValue ( ) ;
2004-01-01 02:51:34 -05:00
if ( formulaType = = FormulaParser . FORMULA_TYPE_NAMEDRANGE ) {
2002-07-14 20:18:48 -04:00
if ( p . getDefaultOperandClass ( ) = = Ptg . CLASS_REF ) {
setClass ( n , Ptg . CLASS_REF ) ;
} else {
setClass ( n , Ptg . CLASS_ARRAY ) ;
}
} else {
setClass ( n , Ptg . CLASS_VALUE ) ;
}
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
private void setParameterRVA ( Node n , int formulaType ) {
2004-04-29 03:16:36 -04:00
Ptg p = n . getValue ( ) ;
int numOperands = n . getNumChildren ( ) ;
2002-07-14 20:18:48 -04:00
if ( p instanceof AbstractFunctionPtg ) {
2004-04-29 03:16:36 -04:00
for ( int i = 0 ; i < numOperands ; i + + ) {
2002-07-14 20:18:48 -04:00
setParameterRVA ( n . getChild ( i ) , ( ( AbstractFunctionPtg ) p ) . getParameterClass ( i ) , formulaType ) ;
2004-04-29 03:16:36 -04:00
// if (n.getChild(i).getValue() instanceof AbstractFunctionPtg) {
// setParameterRVA(n.getChild(i),formulaType);
// }
setParameterRVA ( n . getChild ( i ) , formulaType ) ;
}
2002-07-14 20:18:48 -04:00
} else {
2004-04-29 03:16:36 -04:00
for ( int i = 0 ; i < numOperands ; i + + ) {
2002-07-14 20:18:48 -04:00
setParameterRVA ( n . getChild ( i ) , formulaType ) ;
}
2008-03-04 11:53:32 -05:00
}
2002-07-14 20:18:48 -04:00
}
private void setParameterRVA ( Node n , int expectedClass , int formulaType ) {
2008-03-04 11:53:32 -05:00
Ptg p = n . getValue ( ) ;
if ( expectedClass = = Ptg . CLASS_REF ) { //pg 15, table 1
2002-07-14 20:18:48 -04:00
if ( p . getDefaultOperandClass ( ) = = Ptg . CLASS_REF ) {
setClass ( n , Ptg . CLASS_REF ) ;
}
if ( p . getDefaultOperandClass ( ) = = Ptg . CLASS_VALUE ) {
if ( formulaType = = FORMULA_TYPE_CELL | | formulaType = = FORMULA_TYPE_SHARED ) {
setClass ( n , Ptg . CLASS_VALUE ) ;
} else {
setClass ( n , Ptg . CLASS_ARRAY ) ;
}
}
if ( p . getDefaultOperandClass ( ) = = Ptg . CLASS_ARRAY ) {
setClass ( n , Ptg . CLASS_ARRAY ) ;
}
} else if ( expectedClass = = Ptg . CLASS_VALUE ) { //pg 15, table 2
if ( formulaType = = FORMULA_TYPE_NAMEDRANGE ) {
setClass ( n , Ptg . CLASS_ARRAY ) ;
} else {
setClass ( n , Ptg . CLASS_VALUE ) ;
}
2008-03-04 11:53:32 -05:00
} else { //Array class, pg 16.
2002-07-14 20:18:48 -04:00
if ( p . getDefaultOperandClass ( ) = = Ptg . CLASS_VALUE & &
( formulaType = = FORMULA_TYPE_CELL | | formulaType = = FORMULA_TYPE_SHARED ) ) {
setClass ( n , Ptg . CLASS_VALUE ) ;
} else {
setClass ( n , Ptg . CLASS_ARRAY ) ;
}
}
}
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
private void setClass ( Node n , byte theClass ) {
2008-03-04 11:53:32 -05:00
Ptg p = n . getValue ( ) ;
2002-07-14 20:18:48 -04:00
if ( p instanceof AbstractFunctionPtg | | ! ( p instanceof OperationPtg ) ) {
p . setClass ( theClass ) ;
} else {
for ( int i = 0 ; i < n . getNumChildren ( ) ; i + + ) {
setClass ( n . getChild ( i ) , theClass ) ;
}
}
}
/ * *
2007-10-15 09:48:51 -04:00
* Convience method which takes in a list then passes it to the
2008-03-04 11:53:32 -05:00
* other toFormulaString signature .
2003-08-31 02:16:57 -04:00
* @param book workbook for 3D and named references
* @param lptgs list of Ptg , can be null or empty
* @return a human readable String
2002-07-14 20:18:48 -04:00
* /
2003-08-31 02:16:57 -04:00
public static String toFormulaString ( Workbook book , List lptgs ) {
2002-07-14 20:18:48 -04:00
String retval = null ;
if ( lptgs = = null | | lptgs . size ( ) = = 0 ) return " #NAME " ;
Ptg [ ] ptgs = new Ptg [ lptgs . size ( ) ] ;
ptgs = ( Ptg [ ] ) lptgs . toArray ( ptgs ) ;
2003-08-31 02:16:57 -04:00
retval = toFormulaString ( book , ptgs ) ;
2002-07-14 20:18:48 -04:00
return retval ;
}
2007-10-15 09:48:51 -04:00
/ * *
* Convience method which takes in a list then passes it to the
* other toFormulaString signature . Works on the current
* workbook for 3D and named references
* @param lptgs list of Ptg , can be null or empty
* @return a human readable String
* /
public String toFormulaString ( List lptgs ) {
2008-03-04 11:53:32 -05:00
return toFormulaString ( book , lptgs ) ;
2007-10-15 09:48:51 -04:00
}
2008-03-04 11:53:32 -05:00
2003-08-31 02:16:57 -04:00
/ * *
* Static method to convert an array of Ptgs in RPN order
* to a human readable string format in infix mode .
* @param book workbook for named and 3D references
* @param ptgs array of Ptg , can be null or empty
* @return a human readable String
2002-07-14 20:18:48 -04:00
* /
2003-08-31 02:16:57 -04:00
public static String toFormulaString ( Workbook book , Ptg [ ] ptgs ) {
2008-03-04 11:53:32 -05:00
if ( ptgs = = null | | ptgs . length = = 0 ) {
// TODO - what is the justification for returning "#NAME" (which is not "#NAME?", btw)
return " #NAME " ;
}
Stack stack = new Stack ( ) ;
2003-08-31 02:16:57 -04:00
2002-07-14 20:18:48 -04:00
// Excel allows to have AttrPtg at position 0 (such as Blanks) which
// do not have any operands. Skip them.
2008-02-14 11:01:10 -05:00
int i ;
if ( ptgs [ 0 ] instanceof AttrPtg ) {
2008-03-04 11:53:32 -05:00
AttrPtg attrPtg0 = ( AttrPtg ) ptgs [ 0 ] ;
if ( attrPtg0 . isSemiVolatile ( ) ) {
// no visible formula for semi-volatile
} else {
// TODO -this requirement is unclear and is not addressed by any junits
stack . push ( ptgs [ 0 ] . toFormulaString ( book ) ) ;
}
i = 1 ;
2008-02-14 11:01:10 -05:00
} else {
2008-03-04 11:53:32 -05:00
i = 0 ;
2008-02-14 11:01:10 -05:00
}
2008-03-04 11:53:32 -05:00
2008-02-14 11:01:10 -05:00
for ( ; i < ptgs . length ; i + + ) {
Ptg ptg = ptgs [ i ] ;
2008-03-04 11:53:32 -05:00
// TODO - what about MemNoMemPtg?
if ( ptg instanceof MemAreaPtg | | ptg instanceof MemFuncPtg | | ptg instanceof MemErrPtg ) {
// marks the start of a list of area expressions which will be naturally combined
// by their trailing operators (e.g. UnionPtg)
// TODO - put comment and throw exception in toFormulaString() of these classes
continue ;
}
2008-02-14 11:01:10 -05:00
if ( ! ( ptg instanceof OperationPtg ) ) {
stack . push ( ptg . toFormulaString ( book ) ) ;
2003-08-31 02:16:57 -04:00
continue ;
}
2008-03-04 11:53:32 -05:00
2008-02-14 11:01:10 -05:00
if ( ptg instanceof AttrPtg & & ( ( AttrPtg ) ptg ) . isOptimizedIf ( ) ) {
2003-08-31 02:16:57 -04:00
continue ;
}
2008-03-04 11:53:32 -05:00
final OperationPtg o = ( OperationPtg ) ptg ;
int nOperands = o . getNumberOfOperands ( ) ;
final String [ ] operands = new String [ nOperands ] ;
2003-08-31 02:16:57 -04:00
2008-03-04 11:53:32 -05:00
for ( int j = nOperands - 1 ; j > = 0 ; j - - ) {
if ( stack . isEmpty ( ) ) {
//TODO: write junit to prove this works
String msg = " Too few arguments suppled to operation token ( "
+ o . getClass ( ) . getName ( ) + " ). Expected ( " + nOperands
+ " but got " + ( nOperands - j + 1 ) ;
throw new FormulaParseException ( msg ) ;
}
operands [ j ] = ( String ) stack . pop ( ) ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
stack . push ( o . toFormulaString ( operands ) ) ;
2002-07-14 20:18:48 -04:00
}
2008-03-04 11:53:32 -05:00
if ( stack . isEmpty ( ) ) {
// inspection of the code above reveals that every stack.pop() is followed by a
// stack.push(). So this is either an internal error or impossible.
throw new IllegalStateException ( " Stack underflow " ) ;
}
String result = ( String ) stack . pop ( ) ;
if ( ! stack . isEmpty ( ) ) {
// Might be caused by some tokens like AttrPtg and Mem*Ptg, which really shouldn't
// put anything on the stack
throw new IllegalStateException ( " too much stuff left on the stack " ) ;
}
return result ;
2003-08-31 02:16:57 -04:00
}
2007-10-15 09:48:51 -04:00
/ * *
* Static method to convert an array of Ptgs in RPN order
* to a human readable string format in infix mode . Works
* on the current workbook for named and 3D references .
* @param ptgs array of Ptg , can be null or empty
* @return a human readable String
* /
public String toFormulaString ( Ptg [ ] ptgs ) {
2008-03-04 11:53:32 -05:00
return toFormulaString ( book , ptgs ) ;
2007-10-15 09:48:51 -04:00
}
2003-08-31 02:16:57 -04:00
2003-05-06 11:30:52 -04:00
/ * * Create a tree representation of the RPN token array
* used to run the class ( RVA ) change algo
* /
2002-07-14 20:18:48 -04:00
private Node createTree ( ) {
2008-03-04 11:53:32 -05:00
Stack stack = new Stack ( ) ;
2002-07-14 20:18:48 -04:00
int numPtgs = tokens . size ( ) ;
OperationPtg o ;
int numOperands ;
Node [ ] operands ;
for ( int i = 0 ; i < numPtgs ; i + + ) {
if ( tokens . get ( i ) instanceof OperationPtg ) {
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
o = ( OperationPtg ) tokens . get ( i ) ;
numOperands = o . getNumberOfOperands ( ) ;
operands = new Node [ numOperands ] ;
for ( int j = 0 ; j < numOperands ; j + + ) {
2008-03-04 11:53:32 -05:00
operands [ numOperands - j - 1 ] = ( Node ) stack . pop ( ) ;
2002-07-14 20:18:48 -04:00
}
Node result = new Node ( o ) ;
result . setChildren ( operands ) ;
stack . push ( result ) ;
} else {
stack . push ( new Node ( ( Ptg ) tokens . get ( i ) ) ) ;
}
}
return ( Node ) stack . pop ( ) ;
}
2008-03-04 11:53:32 -05:00
2002-07-14 20:18:48 -04:00
/ * * toString on the parser instance returns the RPN ordered list of tokens
* Useful for testing
* /
public String toString ( ) {
StringBuffer buf = new StringBuffer ( ) ;
for ( int i = 0 ; i < tokens . size ( ) ; i + + ) {
2003-08-31 02:16:57 -04:00
buf . append ( ( ( Ptg ) tokens . get ( i ) ) . toFormulaString ( book ) ) ;
2002-07-14 20:18:48 -04:00
buf . append ( ' ' ) ;
2008-03-04 11:53:32 -05:00
}
2002-07-14 20:18:48 -04:00
return buf . toString ( ) ;
}
2008-03-04 11:53:32 -05:00
2003-05-06 11:30:52 -04:00
/** Private helper class, used to create a tree representation of the formula*/
2008-03-04 11:53:32 -05:00
private static final class Node {
2002-07-14 20:18:48 -04:00
private Ptg value = null ;
private Node [ ] children = new Node [ 0 ] ;
private int numChild = 0 ;
public Node ( Ptg val ) {
2008-03-04 11:53:32 -05:00
value = val ;
2002-07-14 20:18:48 -04:00
}
public void setChildren ( Node [ ] child ) { children = child ; numChild = child . length ; }
public int getNumChildren ( ) { return numChild ; }
public Node getChild ( int number ) { return children [ number ] ; }
public Ptg getValue ( ) { return value ; }
}
2008-03-04 11:53:32 -05:00
}