Bug 46951 - fixed formula parser to better handle range operators and whole row/column refs.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@762250 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Josh Micich 2009-04-06 08:22:25 +00:00
parent 3fa069c79e
commit 527b4c7fc5
10 changed files with 1622 additions and 1073 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! -->
<release version="3.5-beta6" date="2009-??-??">
<action dev="POI-DEVELOPERS" type="fix">46951 - Fixed formula parser to better handle range operators and whole row/column refs.</action>
<action dev="POI-DEVELOPERS" type="fix">46948 - Fixed evaluation of range operator to allow for area-ref operands</action>
<action dev="POI-DEVELOPERS" type="fix">46918 - Fixed ExtendedPivotTableViewFieldsRecord(SXVDEX) to allow shorter format</action>
<action dev="POI-DEVELOPERS" type="fix">46898 - Fixed formula evaluator to not cache intermediate circular-reference error results</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.5-beta6" date="2009-??-??">
<action dev="POI-DEVELOPERS" type="fix">46951 - Fixed formula parser to better handle range operators and whole row/column refs.</action>
<action dev="POI-DEVELOPERS" type="fix">46948 - Fixed evaluation of range operator to allow for area-ref operands</action>
<action dev="POI-DEVELOPERS" type="fix">46918 - Fixed ExtendedPivotTableViewFieldsRecord(SXVDEX) to allow shorter format</action>
<action dev="POI-DEVELOPERS" type="fix">46898 - Fixed formula evaluator to not cache intermediate circular-reference error results</action>

View File

@ -17,7 +17,7 @@
package org.apache.poi.hssf.record.formula;
import org.apache.poi.hssf.util.AreaReference;
import org.apache.poi.ss.util.AreaReference;
import org.apache.poi.ss.formula.ExternSheetReferenceToken;
import org.apache.poi.ss.formula.FormulaRenderingWorkbook;
import org.apache.poi.ss.formula.WorkbookDependentFormula;

View File

@ -41,6 +41,10 @@ public final class MemAreaPtg extends OperandPtg {
field_2_subex_len = in.readShort();
}
public int getLenRefSubexpression() {
return field_2_subex_len;
}
public void write(LittleEndianOutput out) {
out.writeByte(sid + getPtgClass());
out.writeInt(field_1_reserved);

View File

@ -19,51 +19,17 @@ package org.apache.poi.ss.formula;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.poi.hssf.record.UnicodeString;
import org.apache.poi.hssf.record.constant.ErrorConstant;
import org.apache.poi.hssf.record.formula.AbstractFunctionPtg;
import org.apache.poi.hssf.record.formula.AddPtg;
import org.apache.poi.hssf.record.formula.Area3DPtg;
import org.apache.poi.hssf.record.formula.AreaPtg;
import org.apache.poi.hssf.record.formula.ArrayPtg;
import org.apache.poi.hssf.record.formula.AttrPtg;
import org.apache.poi.hssf.record.formula.BoolPtg;
import org.apache.poi.hssf.record.formula.ConcatPtg;
import org.apache.poi.hssf.record.formula.DividePtg;
import org.apache.poi.hssf.record.formula.EqualPtg;
import org.apache.poi.hssf.record.formula.ErrPtg;
import org.apache.poi.hssf.record.formula.FuncPtg;
import org.apache.poi.hssf.record.formula.FuncVarPtg;
import org.apache.poi.hssf.record.formula.GreaterEqualPtg;
import org.apache.poi.hssf.record.formula.GreaterThanPtg;
import org.apache.poi.hssf.record.formula.IntPtg;
import org.apache.poi.hssf.record.formula.LessEqualPtg;
import org.apache.poi.hssf.record.formula.LessThanPtg;
import org.apache.poi.hssf.record.formula.MemFuncPtg;
import org.apache.poi.hssf.record.formula.MissingArgPtg;
import org.apache.poi.hssf.record.formula.MultiplyPtg;
import org.apache.poi.hssf.record.formula.NamePtg;
import org.apache.poi.hssf.record.formula.NameXPtg;
import org.apache.poi.hssf.record.formula.NotEqualPtg;
import org.apache.poi.hssf.record.formula.NumberPtg;
import org.apache.poi.hssf.record.formula.ParenthesisPtg;
import org.apache.poi.hssf.record.formula.PercentPtg;
import org.apache.poi.hssf.record.formula.PowerPtg;
import org.apache.poi.hssf.record.formula.Ptg;
import org.apache.poi.hssf.record.formula.RangePtg;
import org.apache.poi.hssf.record.formula.Ref3DPtg;
import org.apache.poi.hssf.record.formula.RefPtg;
import org.apache.poi.hssf.record.formula.StringPtg;
import org.apache.poi.hssf.record.formula.SubtractPtg;
import org.apache.poi.hssf.record.formula.UnaryMinusPtg;
import org.apache.poi.hssf.record.formula.UnaryPlusPtg;
import org.apache.poi.hssf.record.formula.UnionPtg;
import org.apache.poi.hssf.record.formula.*;
import org.apache.poi.hssf.record.formula.ValueOperatorPtg;
import org.apache.poi.hssf.record.formula.function.FunctionMetadata;
import org.apache.poi.hssf.record.formula.function.FunctionMetadataRegistry;
import org.apache.poi.hssf.usermodel.HSSFErrorConstants;
import org.apache.poi.hssf.util.AreaReference;
import org.apache.poi.hssf.util.CellReference;
import org.apache.poi.ss.util.AreaReference;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.ss.util.CellReference.NameType;
/**
@ -112,6 +78,37 @@ public final class FormulaParser {
return sb.toString();
}
}
private static final class SheetIdentifier {
private final String _bookName;
private final Identifier _sheetIdentifier;
public SheetIdentifier(String bookName, Identifier sheetIdentifier) {
_bookName = bookName;
_sheetIdentifier = sheetIdentifier;
}
public String getBookName() {
return _bookName;
}
public Identifier getSheetIdentifier() {
return _sheetIdentifier;
}
public String toString() {
StringBuffer sb = new StringBuffer(64);
sb.append(getClass().getName());
sb.append(" [");
if (_bookName != null) {
sb.append(" [").append(_sheetIdentifier.getName()).append("]");
}
if (_sheetIdentifier.isQuoted()) {
sb.append("'").append(_sheetIdentifier.getName()).append("'");
} else {
sb.append(_sheetIdentifier.getName());
}
sb.append("]");
return sb.toString();
}
}
/**
* Specific exception thrown when a supplied formula does not parse properly.<br/>
@ -129,6 +126,7 @@ public final class FormulaParser {
private final String _formulaString;
private final int _formulaLength;
/** points at the next character to be read (after the {@link #look} char) */
private int _pointer;
private ParseNode _rootNode;
@ -209,6 +207,16 @@ public final class FormulaParser {
_pointer++;
//System.out.println("Got char: "+ look);
}
private void resetPointer(int ptr) {
_pointer = ptr;
if (_pointer <= _formulaLength) {
look=_formulaString.charAt(_pointer-1);
} else {
// Just return if so and reset 'look' to something to keep
// SkipWhitespace from spinning
look = (char)0;
}
}
/** Report What Was Expected */
private RuntimeException expected(String s) {
@ -235,11 +243,6 @@ public final class FormulaParser {
return Character.isDigit(c);
}
/** Recognize an Alphanumeric */
private static boolean IsAlNum(char c) {
return IsAlpha(c) || IsDigit(c);
}
/** Recognize White Space */
private static boolean IsWhite( char c) {
return c ==' ' || c== TAB;
@ -263,48 +266,6 @@ public final class FormulaParser {
}
GetChar();
}
private String parseUnquotedIdentifier() {
Identifier iden = parseIdentifier();
if (iden.isQuoted()) {
throw expected("unquoted identifier");
}
return iden.getName();
}
/**
* Parses a sheet name, named range name, or simple cell reference.<br/>
* Note - identifiers in Excel can contain dots, so this method may return a String
* which may need to be converted to an area reference. For example, this method
* may return a value like "A1..B2", in which case the caller must convert it to
* an area reference like "A1:B2"
*/
private Identifier parseIdentifier() {
StringBuffer sb = new StringBuffer();
if (!IsAlpha(look) && look != '\'' && look != '[') {
throw expected("Name");
}
boolean isQuoted = look == '\'';
if(isQuoted) {
Match('\'');
boolean done = look == '\'';
while(!done) {
sb.append(look);
GetChar();
if(look == '\'')
{
Match('\'');
done = look != '\'';
}
}
} else {
// allow for any sequence of dots and identifier chars
// special case of two consecutive dots is best treated in the calling code
while (IsAlNum(look) || look == '.' || look == '[' || look == ']' || look == '\\') {
sb.append(look);
GetChar();
}
}
return new Identifier(sb.toString(), isQuoted);
}
/** Get a Number */
private String GetNum() {
@ -317,123 +278,300 @@ public final class FormulaParser {
return value.length() == 0 ? null : value.toString();
}
private ParseNode parseFunctionReferenceOrName() {
Identifier iden = parseIdentifier();
if (look == '('){
//This is a function
return function(iden.getName());
}
if (!iden.isQuoted()) {
String name = iden.getName();
if (name.equalsIgnoreCase("TRUE") || name.equalsIgnoreCase("FALSE")) {
return new ParseNode(new BoolPtg(name.toUpperCase()));
}
}
return parseRangeExpression(iden);
}
private ParseNode parseRangeExpression(Identifier iden) {
Ptg ptgA = parseNameOrCellRef(iden);
if (look == ':') {
private ParseNode parseRangeExpression() {
ParseNode result = parseRangeable();
boolean hasRange = false;
while (look == ':') {
int pos = _pointer;
GetChar();
Identifier iden2 = parseIdentifier();
Ptg ptgB = parseNameOrCellRef(iden2);
Ptg simplified = reduceRangeExpression(ptgA, ptgB);
ParseNode nextPart = parseRangeable();
// Note - no range simplification here. An expr like "A1:B2:C3:D4:E5" should be
// grouped into area ref pairs like: "(A1:B2):(C3:D4):E5"
// Furthermore, Excel doesn't seem to simplify
// expressions like "Sheet1!A1:Sheet1:B2" into "Sheet1!A1:B2"
if (simplified == null) {
ParseNode[] children = {
new ParseNode(ptgA),
new ParseNode(ptgB),
};
ParseNode result = new ParseNode(RangePtg.instance, children);
MemFuncPtg memFuncPtg = new MemFuncPtg(result.getEncodedSize());
return new ParseNode(memFuncPtg, result);
checkValidRangeOperand("LHS", pos, result);
checkValidRangeOperand("RHS", pos, nextPart);
ParseNode[] children = { result, nextPart, };
result = new ParseNode(RangePtg.instance, children);
hasRange = true;
}
return new ParseNode(simplified);
if (hasRange) {
return augmentWithMemPtg(result);
}
return new ParseNode(ptgA);
return result;
}
private static ParseNode augmentWithMemPtg(ParseNode root) {
Ptg memPtg;
if (needsMemFunc(root)) {
memPtg = new MemFuncPtg(root.getEncodedSize());
} else {
memPtg = new MemAreaPtg(root.getEncodedSize());
}
return new ParseNode(memPtg, root);
}
/**
* From OOO doc: "Whenever one operand of the reference subexpression is a function,
* a defined name, a 3D reference, or an external reference (and no error occurs),
* a tMemFunc token is used"
*
*/
private static boolean needsMemFunc(ParseNode root) {
Ptg token = root.getToken();
if (token instanceof AbstractFunctionPtg) {
return true;
}
if (token instanceof ExternSheetReferenceToken) { // 3D refs
return true;
}
if (token instanceof NamePtg || token instanceof NameXPtg) { // 3D refs
return true;
}
if (token instanceof OperationPtg || token instanceof ParenthesisPtg) {
// expect RangePtg, but perhaps also UnionPtg, IntersectionPtg etc
for(ParseNode child : root.getChildren()) {
if (needsMemFunc(child)) {
return true;
}
}
return false;
}
if (token instanceof OperandPtg) {
return false;
}
if (token instanceof OperationPtg) {
return true;
}
return false;
}
/**
*
* "A1", "B3" -> "A1:B3"
* "sheet1!A1", "B3" -> "sheet1!A1:B3"
*
* @return <code>null</code> if the range expression cannot / shouldn't be reduced.
* @param currentParsePosition used to format a potential error message
*/
private static Ptg reduceRangeExpression(Ptg ptgA, Ptg ptgB) {
if (!(ptgB instanceof RefPtg)) {
// only when second ref is simple 2-D ref can the range
// expression be converted to an area ref
return null;
private static void checkValidRangeOperand(String sideName, int currentParsePosition, ParseNode pn) {
if (!isValidRangeOperand(pn)) {
throw new FormulaParseException("The " + sideName
+ " of the range operator ':' at position "
+ currentParsePosition + " is not a proper reference.");
}
RefPtg refB = (RefPtg) ptgB;
if (ptgA instanceof RefPtg) {
RefPtg refA = (RefPtg) ptgA;
return new AreaPtg(refA.getRow(), refB.getRow(), refA.getColumn(), refB.getColumn(),
refA.isRowRelative(), refB.isRowRelative(), refA.isColRelative(), refB.isColRelative());
}
if (ptgA instanceof Ref3DPtg) {
Ref3DPtg refA = (Ref3DPtg) ptgA;
return new Area3DPtg(refA.getRow(), refB.getRow(), refA.getColumn(), refB.getColumn(),
refA.isRowRelative(), refB.isRowRelative(), refA.isColRelative(), refB.isColRelative(),
refA.getExternSheetIndex());
}
// Note - other operand types (like AreaPtg) which probably can't evaluate
// do not cause validation errors at parse time
return null;
}
private Ptg parseNameOrCellRef(Identifier iden) {
if (look == '!') {
GetChar();
// 3-D ref
// this code assumes iden is a sheetName
// TODO - handle <book name> ! <named range name>
int externIdx = getExternalSheetIndex(iden.getName());
String secondIden = parseUnquotedIdentifier();
if (isRowOrCol(secondIden) && look == ':') {
GetChar();
String thirdIden = parseUnquotedIdentifier();
return new Area3DPtg(secondIden + ":" + thirdIden, externIdx);
}
AreaReference areaRef = parseArea(secondIden);
if (areaRef == null) {
return new Ref3DPtg(secondIden, externIdx);
}
// will happen if dots are used instead of colon
return new Area3DPtg(areaRef.formatAsString(), externIdx);
/**
* @return <code>false</code> if sub-expression represented the specified ParseNode definitely
* cannot appear on either side of the range (':') operator
*/
private static boolean isValidRangeOperand(ParseNode a) {
Ptg tkn = a.getToken();
// Note - order is important for these instance-of checks
if (tkn instanceof OperandPtg) {
// notably cell refs and area refs
return true;
}
String name = iden.getName();
AreaReference areaRef = parseArea(name);
if (areaRef != null) {
// will happen if dots are used instead of colon
return new AreaPtg(areaRef.formatAsString());
// next 2 are special cases of OperationPtg
if (tkn instanceof AbstractFunctionPtg) {
AbstractFunctionPtg afp = (AbstractFunctionPtg) tkn;
byte returnClass = afp.getDefaultOperandClass();
return Ptg.CLASS_REF == returnClass;
}
// This can be either a cell ref or a named range
int nameType = CellReference.classifyCellReference(name);
if (nameType == NameType.CELL) {
return new RefPtg(name);
if (tkn instanceof ValueOperatorPtg) {
return false;
}
if (tkn instanceof OperationPtg) {
return true;
}
// one special case of ControlPtg
if (tkn instanceof ParenthesisPtg) {
// parenthesis Ptg should have only one child
return isValidRangeOperand(a.getChildren()[0]);
}
// one special case of ScalarConstantPtg
if (tkn == ErrPtg.REF_INVALID) {
return true;
}
// All other ControlPtgs and ScalarConstantPtgs cannot be used with ':'
return false;
}
/**
* Parses area refs (things which could be the operand of ':') and simple factors
* Examples
* <pre>
* A$1
* $A$1 : $B1
* A1 ....... C2
* Sheet1 !$A1
* a..b!A1
* 'my sheet'!A1
* .my.sheet!A1
* my.named..range.
* foo.bar(123.456, "abc")
* 123.456
* "abc"
* true
* </pre>
*
*/
private ParseNode parseRangeable() {
SkipWhite();
int savePointer = _pointer;
SheetIdentifier sheetIden = parseSheetName();
if (sheetIden == null) {
resetPointer(savePointer);
} else {
SkipWhite();
savePointer = _pointer;
}
SimpleRangePart part1 = parseSimpleRangePart();
if (part1 == null) {
if (sheetIden != null) {
throw new FormulaParseException("Cell reference expected after sheet name at index "
+ _pointer + ".");
}
return parseNonRange(savePointer);
}
boolean whiteAfterPart1 = IsWhite(look);
if (whiteAfterPart1) {
SkipWhite();
}
if (look == ':') {
if (nameType == NameType.COLUMN) {
int colonPos = _pointer;
GetChar();
String secondIden = parseUnquotedIdentifier();
if (CellReference.classifyCellReference(secondIden) != NameType.COLUMN) {
throw new FormulaParseException("Expected full column after '" + name
+ ":' but got '" + secondIden + "'");
SkipWhite();
SimpleRangePart part2 = parseSimpleRangePart();
if (part2 != null && !part1.isCompatibleForArea(part2)) {
// second part is not compatible with an area ref e.g. S!A1:S!B2
// where S might be a sheet name (that looks like a column name)
part2 = null;
}
return new AreaPtg(name + ":" + secondIden);
if (part2 == null) {
// second part is not compatible with an area ref e.g. A1:OFFSET(B2, 1, 2)
// reset and let caller use explicit range operator
resetPointer(colonPos);
if (!part1.isCell()) {
String prefix;
if (sheetIden == null) {
prefix = "";
} else {
prefix = "'" + sheetIden.getSheetIdentifier().getName() + '!';
}
throw new FormulaParseException(prefix + part1.getRep() + "' is not a proper reference.");
}
return createAreaRefParseNode(sheetIden, part1, part2);
}
return createAreaRefParseNode(sheetIden, part1, part2);
}
if (look == '.') {
GetChar();
int dotCount = 1;
while (look =='.') {
dotCount ++;
GetChar();
}
boolean whiteBeforePart2 = IsWhite(look);
SkipWhite();
SimpleRangePart part2 = parseSimpleRangePart();
String part1And2 = _formulaString.substring(savePointer-1, _pointer-1);
if (part2 == null) {
if (sheetIden != null) {
throw new FormulaParseException("Complete area reference expected after sheet name at index "
+ _pointer + ".");
}
return parseNonRange(savePointer);
}
if (whiteAfterPart1 || whiteBeforePart2) {
if (part1.isRowOrColumn() || part2.isRowOrColumn()) {
// "A .. B" not valid syntax for "A:B"
// and there's no other valid expression that fits this grammar
throw new FormulaParseException("Dotted range (full row or column) expression '"
+ part1And2 + "' must not contain whitespace.");
}
return createAreaRefParseNode(sheetIden, part1, part2);
}
if (dotCount == 1 && part1.isRow() && part2.isRow()) {
// actually, this is looking more like a number
return parseNonRange(savePointer);
}
if (part1.isRowOrColumn() || part2.isRowOrColumn()) {
if (dotCount != 2) {
throw new FormulaParseException("Dotted range (full row or column) expression '" + part1And2
+ "' must have exactly 2 dots.");
}
}
if (nameType != NameType.NAMED_RANGE) {
new FormulaParseException("Name '" + name
+ "' does not look like a cell reference or named range");
return createAreaRefParseNode(sheetIden, part1, part2);
}
if (part1.isCell() && isValidCellReference(part1.getRep())) {
return createAreaRefParseNode(sheetIden, part1, null);
}
if (sheetIden != null) {
throw new FormulaParseException("Second part of cell reference expected after sheet name at index "
+ _pointer + ".");
}
return parseNonRange(savePointer);
}
/**
* Parses simple factors that are not primitive ranges or range components
* i.e. '!', ':'(and equiv '...') do not appear
* Examples
* <pre>
* my.named...range.
* foo.bar(123.456, "abc")
* 123.456
* "abc"
* true
* </pre>
*/
private ParseNode parseNonRange(int savePointer) {
resetPointer(savePointer);
if (Character.isDigit(look)) {
return new ParseNode(parseNumber());
}
if (look == '"') {
return new ParseNode(new StringPtg(parseStringLiteral()));
}
// from now on we can only be dealing with non-quoted identifiers
// which will either be named ranges or functions
StringBuilder sb = new StringBuilder();
if (!Character.isLetter(look)) {
throw expected("number, string, or defined name");
}
while (isValidDefinedNameChar(look)) {
sb.append(look);
GetChar();
}
SkipWhite();
String name = sb.toString();
if (look == '(') {
return function(name);
}
if (name.equalsIgnoreCase("TRUE") || name.equalsIgnoreCase("FALSE")) {
return new ParseNode(new BoolPtg(name.toUpperCase()));
}
if (_book == null) {
// Only test cases omit the book (expecting it not to be needed)
throw new IllegalStateException("Need book to evaluate name '" + name + "'");
}
EvaluationName evalName = _book.getName(name, _sheetIndex);
if (evalName == null) {
@ -441,80 +579,300 @@ public final class FormulaParser {
+ name + "' does not exist in the current workbook.");
}
if (evalName.isRange()) {
return evalName.createPtg();
return new ParseNode(evalName.createPtg());
}
// TODO - what about NameX ?
throw new FormulaParseException("Specified name '"
+ name + "' is not a range as expected");
}
private static boolean isRowOrCol(String str) {
int i=0;
if (str.charAt(i) == '$') {
i++;
}
if (IsDigit(str.charAt(i))) {
while (i<str.length()) {
if (!IsDigit(str.charAt(i))) {
return false;
}
i++;
}
return true;
}
if (IsAlpha(str.charAt(i))) {
while (i<str.length()) {
if (!IsAlpha(str.charAt(i))) {
return false;
}
i++;
}
return true;
}
return false;
}
private int getExternalSheetIndex(String name) {
if (name.charAt(0) == '[') {
// we have a sheet name qualified with workbook name e.g. '[MyData.xls]Sheet1'
int pos = name.lastIndexOf(']'); // safe because sheet names never have ']'
String wbName = name.substring(1, pos);
String sheetName = name.substring(pos+1);
return _book.getExternalSheetIndex(wbName, sheetName);
}
return _book.getExternalSheetIndex(name);
+ name + "' is not a range as expected.");
}
/**
* @param name an 'identifier' like string (i.e. contains alphanums, and dots)
* @return <code>null</code> if name cannot be split at a dot
*
* @return <code>true</code> if the specified character may be used in a defined name
*/
private AreaReference parseArea(String name) {
int dotPos = name.indexOf('.');
if (dotPos < 0) {
private static boolean isValidDefinedNameChar(char ch) {
if (Character.isLetterOrDigit(ch)) {
return true;
}
switch (ch) {
case '.':
case '_':
case '?':
case '\\': // of all things
return true;
}
return false;
}
/**
*
* @param sheetIden may be <code>null</code>
* @param part1
* @param part2 may be <code>null</code>
*/
private ParseNode createAreaRefParseNode(SheetIdentifier sheetIden, SimpleRangePart part1,
SimpleRangePart part2) throws FormulaParseException {
int extIx;
if (sheetIden == null) {
extIx = Integer.MIN_VALUE;
} else {
String sName = sheetIden.getSheetIdentifier().getName();
if (sheetIden.getBookName() == null) {
extIx = _book.getExternalSheetIndex(sName);
} else {
extIx = _book.getExternalSheetIndex(sheetIden.getBookName(), sName);
}
}
Ptg ptg;
if (part2 == null) {
CellReference cr = part1.getCellReference();
if (sheetIden == null) {
ptg = new RefPtg(cr);
} else {
ptg = new Ref3DPtg(cr, extIx);
}
} else {
AreaReference areaRef = createAreaRef(part1, part2);
if (sheetIden == null) {
ptg = new AreaPtg(areaRef);
} else {
ptg = new Area3DPtg(areaRef, extIx);
}
}
return new ParseNode(ptg);
}
private static AreaReference createAreaRef(SimpleRangePart part1, SimpleRangePart part2) {
if (!part1.isCompatibleForArea(part2)) {
throw new FormulaParseException("has incompatible parts: '"
+ part1.getRep() + "' and '" + part2.getRep() + "'.");
}
if (part1.isRow()) {
return AreaReference.getWholeRow(part1.getRep(), part2.getRep());
}
if (part1.isColumn()) {
return AreaReference.getWholeColumn(part1.getRep(), part2.getRep());
}
return new AreaReference(part1.getCellReference(), part2.getCellReference());
}
/**
* Matches a zero or one letter-runs followed by zero or one digit-runs.
* Either or both runs man optionally be prefixed with a single '$'.
* (copied+modified from {@link org.apache.poi.ss.util.CellReference#CELL_REF_PATTERN})
*/
private static final Pattern CELL_REF_PATTERN = Pattern.compile("(\\$?[A-Za-z]+)?(\\$?[0-9]+)?");
/**
* Parses out a potential LHS or RHS of a ':' intended to produce a plain AreaRef. Normally these are
* proper cell references but they could also be row or column refs like "$AC" or "10"
* @return <code>null</code> (and leaves {@link #_pointer} unchanged if a proper range part does not parse out
*/
private SimpleRangePart parseSimpleRangePart() {
int ptr = _pointer-1; // TODO avoid StringIndexOutOfBounds
boolean hasDigits = false;
boolean hasLetters = false;
while (ptr < _formulaLength) {
char ch = _formulaString.charAt(ptr);
if (Character.isDigit(ch)) {
hasDigits = true;
} else if (Character.isLetter(ch)) {
hasLetters = true;
} else if (ch =='$') {
//
} else {
break;
}
ptr++;
}
if (ptr <= _pointer-1) {
return null;
}
int dotCount = 1;
while (dotCount<name.length() && name.charAt(dotPos+dotCount) == '.') {
dotCount++;
if (dotCount>3) {
// four or more consecutive dots does not convert to ':'
String rep = _formulaString.substring(_pointer-1, ptr);
if (!CELL_REF_PATTERN.matcher(rep).matches()) {
return null;
}
}
// This expression is only valid as an area ref, if the LHS and RHS of the dot(s) are both
// cell refs. Otherwise, this expression must be a named range name
String partA = name.substring(0, dotPos);
if (!isValidCellReference(partA)) {
// Check range bounds against grid max
if (hasLetters && hasDigits) {
if (!isValidCellReference(rep)) {
return null;
}
String partB = name.substring(dotPos+dotCount);
if (!isValidCellReference(partB)) {
} else if (hasLetters) {
if (!CellReference.isColumnWithnRange(rep.replace("$", ""))) {
return null;
}
CellReference topLeft = new CellReference(partA);
CellReference bottomRight = new CellReference(partB);
return new AreaReference(topLeft, bottomRight);
} else if (hasDigits) {
int i;
try {
i = Integer.parseInt(rep.replace("$", ""));
} catch (NumberFormatException e) {
return null;
}
if (i<1 || i>65536) {
return null;
}
} else {
// just dollars ? can this happen?
return null;
}
resetPointer(ptr+1); // stepping forward
return new SimpleRangePart(rep, hasLetters, hasDigits);
}
/**
* A1, $A1, A$1, $A$1, A, 1
*/
private static final class SimpleRangePart {
private enum Type {
CELL, ROW, COLUMN;
public static Type get(boolean hasLetters, boolean hasDigits) {
if (hasLetters) {
return hasDigits ? CELL : COLUMN;
}
if (!hasDigits) {
throw new IllegalArgumentException("must have either letters or numbers");
}
return ROW;
}
}
private final Type _type;
private final String _rep;
public SimpleRangePart(String rep, boolean hasLetters, boolean hasNumbers) {
_rep = rep;
_type = Type.get(hasLetters, hasNumbers);
}
public boolean isCell() {
return _type == Type.CELL;
}
public boolean isRowOrColumn() {
return _type != Type.CELL;
}
public CellReference getCellReference() {
if (_type != Type.CELL) {
throw new IllegalStateException("Not applicable to this type");
}
return new CellReference(_rep);
}
public boolean isColumn() {
return _type == Type.COLUMN;
}
public boolean isRow() {
return _type == Type.ROW;
}
public String getRep() {
return _rep;
}
/**
* @return <code>true</code> if the two range parts can be combined in an
* {@link AreaPtg} ( Note - the explicit range operator (:) may still be valid
* when this method returns <code>false</code> )
*/
public boolean isCompatibleForArea(SimpleRangePart part2) {
return _type == part2._type;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder(64);
sb.append(getClass().getName()).append(" [");
sb.append(_rep);
sb.append("]");
return sb.toString();
}
}
/**
* Note - caller should reset {@link #_pointer} upon <code>null</code> result
* @param iden identifier prefix (if unquoted, it is terminated at first dot)
* @return The sheet name as an identifier <code>null</code> if '!' is not found in the right place
*/
private SheetIdentifier parseSheetName() {
String bookName;
if (look == '[') {
StringBuilder sb = new StringBuilder();
GetChar();
while (look != ']') {
sb.append(look);
GetChar();
}
GetChar();
bookName = sb.toString();
} else {
bookName = null;
}
if (look == '\'') {
StringBuffer sb = new StringBuffer();
Match('\'');
boolean done = look == '\'';
while(!done) {
sb.append(look);
GetChar();
if(look == '\'')
{
Match('\'');
done = look != '\'';
}
}
Identifier iden = new Identifier(sb.toString(), true);
// quoted identifier - can't concatenate anything more
SkipWhite();
if (look == '!') {
GetChar();
return new SheetIdentifier(bookName, iden);
}
return null;
}
// unquoted sheet names must start with underscore or a letter
if (look =='_' || Character.isLetter(look)) {
StringBuilder sb = new StringBuilder();
// can concatenate idens with dots
while (isUnquotedSheetNameChar(look)) {
sb.append(look);
GetChar();
}
SkipWhite();
if (look == '!') {
GetChar();
return new SheetIdentifier(bookName, new Identifier(sb.toString(), false));
}
return null;
}
return null;
}
/**
* very similar to {@link SheetNameFormatter#isSpecialChar(char)}
*/
private static boolean isUnquotedSheetNameChar(char ch) {
if(Character.isLetterOrDigit(ch)) {
return true;
}
switch(ch) {
case '.': // dot is OK
case '_': // underscore is OK
return true;
}
return false;
}
/**
@ -538,6 +896,10 @@ public final class FormulaParser {
// user defined function
// in the token tree, the name is more or less the first argument
if (_book == null) {
// Only test cases omit the book (expecting it not to be needed)
throw new IllegalStateException("Need book to evaluate name '" + name + "'");
}
EvaluationName hName = _book.getName(name, _sheetIndex);
if (hName == null) {
@ -729,12 +1091,14 @@ public final class FormulaParser {
Match('}');
return arrayNode;
}
if (IsAlpha(look) || look == '\'' || look == '['){
return parseFunctionReferenceOrName();
if (IsAlpha(look) || Character.isDigit(look) || look == '\'' || look == '['){
return parseRangeExpression();
}
// else - assume number
if (look == '.') {
return new ParseNode(parseNumber());
}
throw expected("cell ref or constant literal");
}
private ParseNode parseArray() {
@ -865,6 +1229,9 @@ public final class FormulaParser {
private int parseErrorLiteral() {
Match('#');
String part1 = parseUnquotedIdentifier().toUpperCase();
if (part1 == null) {
throw expected("remainder of error constant literal");
}
switch(part1.charAt(0)) {
case 'V':
@ -915,6 +1282,21 @@ public final class FormulaParser {
throw expected("#VALUE!, #REF!, #DIV/0!, #NAME?, #NUM!, #NULL! or #N/A");
}
private String parseUnquotedIdentifier() {
if (look == '\'') {
throw expected("unquoted identifier");
}
StringBuilder sb = new StringBuilder();
while (Character.isLetterOrDigit(look) || look == '.') {
sb.append(look);
GetChar();
}
if (sb.length() < 1) {
return null;
}
return sb.toString();
}
/**
* Get a PTG for an integer from its string representation.
@ -1013,8 +1395,7 @@ public final class FormulaParser {
continue;
}
if (hasUnions) {
MemFuncPtg memFuncPtg = new MemFuncPtg(result.getEncodedSize());
result = new ParseNode(memFuncPtg, result);
return augmentWithMemPtg(result);
}
return result;
}

View File

@ -21,6 +21,7 @@ import org.apache.poi.hssf.record.formula.AbstractFunctionPtg;
import org.apache.poi.hssf.record.formula.AttrPtg;
import org.apache.poi.hssf.record.formula.ControlPtg;
import org.apache.poi.hssf.record.formula.FuncVarPtg;
import org.apache.poi.hssf.record.formula.MemAreaPtg;
import org.apache.poi.hssf.record.formula.MemFuncPtg;
import org.apache.poi.hssf.record.formula.Ptg;
import org.apache.poi.hssf.record.formula.RangePtg;
@ -112,6 +113,7 @@ final class OperandClassTransformer {
}
if (token instanceof ValueOperatorPtg || token instanceof ControlPtg
|| token instanceof MemFuncPtg
|| token instanceof MemAreaPtg
|| token instanceof UnionPtg) {
// Value Operator Ptgs and Control are base tokens, so token will be unchanged
// but any child nodes are processed according to desiredOperandClass and callerForceArrayFlag

View File

@ -20,6 +20,7 @@ package org.apache.poi.ss.formula;
import org.apache.poi.hssf.record.formula.ArrayPtg;
import org.apache.poi.hssf.record.formula.AttrPtg;
import org.apache.poi.hssf.record.formula.FuncVarPtg;
import org.apache.poi.hssf.record.formula.MemAreaPtg;
import org.apache.poi.hssf.record.formula.MemFuncPtg;
import org.apache.poi.hssf.record.formula.Ptg;
import org.apache.poi.hssf.record.formula.function.FunctionMetadataRegistry;
@ -39,6 +40,9 @@ final class ParseNode {
private final int _tokenCount;
public ParseNode(Ptg token, ParseNode[] children) {
if (token == null) {
throw new IllegalArgumentException("token must not be null");
}
_token = token;
_children = children;
_isIf = isIf(token);
@ -85,7 +89,7 @@ final class ParseNode {
collectIfPtgs(temp);
return;
}
boolean isPreFixOperator = _token instanceof MemFuncPtg;
boolean isPreFixOperator = _token instanceof MemFuncPtg || _token instanceof MemAreaPtg;
if (isPreFixOperator) {
temp.add(_token);
}

View File

@ -159,6 +159,14 @@ public class AreaReference {
return false;
}
public static AreaReference getWholeRow(String start, String end) {
return new AreaReference("$A" + start + ":$IV" + end);
}
public static AreaReference getWholeColumn(String start, String end) {
return new AreaReference(start + "$1:" + end + "$65536");
}
/**
* Is the reference for a whole-column reference,
* such as C:C or D:G ?

View File

@ -291,7 +291,7 @@ public class CellReference {
return true;
}
private static boolean isColumnWithnRange(String colStr) {
public static boolean isColumnWithnRange(String colStr) {
int numberOfLetters = colStr.length();
if(numberOfLetters > BIFF8_LAST_COLUMN_TEXT_LEN) {
// "Sheet1" case etc

View File

@ -23,36 +23,7 @@ import junit.framework.TestCase;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.record.constant.ErrorConstant;
import org.apache.poi.hssf.record.formula.AbstractFunctionPtg;
import org.apache.poi.hssf.record.formula.AddPtg;
import org.apache.poi.hssf.record.formula.Area3DPtg;
import org.apache.poi.hssf.record.formula.AreaI;
import org.apache.poi.hssf.record.formula.AreaPtg;
import org.apache.poi.hssf.record.formula.ArrayPtg;
import org.apache.poi.hssf.record.formula.AttrPtg;
import org.apache.poi.hssf.record.formula.BoolPtg;
import org.apache.poi.hssf.record.formula.ConcatPtg;
import org.apache.poi.hssf.record.formula.DividePtg;
import org.apache.poi.hssf.record.formula.EqualPtg;
import org.apache.poi.hssf.record.formula.ErrPtg;
import org.apache.poi.hssf.record.formula.FuncPtg;
import org.apache.poi.hssf.record.formula.FuncVarPtg;
import org.apache.poi.hssf.record.formula.IntPtg;
import org.apache.poi.hssf.record.formula.MemFuncPtg;
import org.apache.poi.hssf.record.formula.MissingArgPtg;
import org.apache.poi.hssf.record.formula.MultiplyPtg;
import org.apache.poi.hssf.record.formula.NamePtg;
import org.apache.poi.hssf.record.formula.NumberPtg;
import org.apache.poi.hssf.record.formula.PercentPtg;
import org.apache.poi.hssf.record.formula.PowerPtg;
import org.apache.poi.hssf.record.formula.Ptg;
import org.apache.poi.hssf.record.formula.RangePtg;
import org.apache.poi.hssf.record.formula.Ref3DPtg;
import org.apache.poi.hssf.record.formula.RefPtg;
import org.apache.poi.hssf.record.formula.StringPtg;
import org.apache.poi.hssf.record.formula.SubtractPtg;
import org.apache.poi.hssf.record.formula.UnaryMinusPtg;
import org.apache.poi.hssf.record.formula.UnaryPlusPtg;
import org.apache.poi.hssf.record.formula.UnionPtg;
import org.apache.poi.hssf.record.formula.*;
import org.apache.poi.hssf.usermodel.FormulaExtractor;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFErrorConstants;
@ -63,6 +34,7 @@ import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.formula.FormulaParser;
import org.apache.poi.ss.formula.FormulaParserTestHelper;
import org.apache.poi.ss.usermodel.BaseTestBugzillaIssues;
/**
* Test the low level formula parser functionality. High level tests are to
@ -1017,4 +989,180 @@ public final class TestFormulaParser extends TestCase {
assertEquals("NOT(POI\\2009=\"3.5-final\")", cell_D1.getCellFormula());
}
/**
* TODO - delete equiv test:
* {@link BaseTestBugzillaIssues#test42448()}
*/
public void testParseAbnormalSheetNamesAndRanges_bug42448() {
HSSFWorkbook wb = new HSSFWorkbook();
wb.createSheet("A");
try {
HSSFFormulaParser.parse("SUM(A!C7:A!C67)", wb);
} catch (StringIndexOutOfBoundsException e) {
throw new AssertionFailedError("Identified bug 42448");
}
// the exact example from the bugzilla description:
HSSFFormulaParser.parse("SUMPRODUCT(A!C7:A!C67, B8:B68) / B69", wb);
}
public void testRangeFuncOperand_bug46951() {
HSSFWorkbook wb = new HSSFWorkbook();
Ptg[] ptgs;
try {
ptgs = HSSFFormulaParser.parse("SUM(C1:OFFSET(C1,0,B1))", wb);
} catch (RuntimeException e) {
if (e.getMessage().equals("Specified named range 'OFFSET' does not exist in the current workbook.")) {
throw new AssertionFailedError("Identified bug 46951");
}
throw e;
}
confirmTokenClasses(ptgs, new Class[] {
MemFuncPtg.class, // [len=23]
RefPtg.class, // [C1]
RefPtg.class, // [C1]
IntPtg.class, // [0]
RefPtg.class, // [B1]
FuncVarPtg.class, // [OFFSET nArgs=3]
RangePtg.class, //
AttrPtg.class, // [sum ]
});
}
public void testUnionOfFullCollFullRowRef() {
Ptg[] ptgs;
ptgs = parseFormula("3:4");
ptgs = parseFormula("$Z:$AC");
confirmTokenClasses(ptgs, new Class[] {
AreaPtg.class,
});
ptgs = parseFormula("B:B");
ptgs = parseFormula("$11:$13");
confirmTokenClasses(ptgs, new Class[] {
AreaPtg.class,
});
ptgs = parseFormula("$A:$A,$1:$4");
confirmTokenClasses(ptgs, new Class[] {
MemAreaPtg.class,
AreaPtg.class,
AreaPtg.class,
UnionPtg.class,
});
HSSFWorkbook wb = new HSSFWorkbook();
wb.createSheet("Sheet1");
ptgs = HSSFFormulaParser.parse("Sheet1!$A:$A,Sheet1!$1:$4", wb);
confirmTokenClasses(ptgs, new Class[] {
MemFuncPtg.class,
Area3DPtg.class,
Area3DPtg.class,
UnionPtg.class,
});
ptgs = HSSFFormulaParser.parse("'Sheet1'!$A:$A,'Sheet1'!$1:$4", wb);
confirmTokenClasses(ptgs, new Class[] {
MemFuncPtg.class,
Area3DPtg.class,
Area3DPtg.class,
UnionPtg.class,
});
}
public void testExplicitRangeWithTwoSheetNames() {
HSSFWorkbook wb = new HSSFWorkbook();
wb.createSheet("Sheet1");
Ptg[] ptgs = HSSFFormulaParser.parse("Sheet1!F1:Sheet1!G2", wb);
confirmTokenClasses(ptgs, new Class[] {
MemFuncPtg.class,
Ref3DPtg.class,
Ref3DPtg.class,
RangePtg.class,
});
MemFuncPtg mf;
mf = (MemFuncPtg)ptgs[0];
assertEquals(15, mf.getLenRefSubexpression());
}
/**
* Checks that the area-ref and explicit range operators get the right associativity
* and that the {@link MemFuncPtg} / {@link MemAreaPtg} is added correctly
*/
public void testComplexExplicitRangeEncodings() {
Ptg[] ptgs;
ptgs = parseFormula("SUM(OFFSET(A1,0,0):B2:C3:D4:E5:OFFSET(F6,1,1):G7)");
confirmTokenClasses(ptgs, new Class[] {
// AttrPtg.class, // [volatile ] // POI doesn't do this yet (Apr 2009)
MemFuncPtg.class, // len 57
RefPtg.class, // [A1]
IntPtg.class, // [0]
IntPtg.class, // [0]
FuncVarPtg.class, // [OFFSET nArgs=3]
AreaPtg.class, // [B2:C3]
RangePtg.class,
AreaPtg.class, // [D4:E5]
RangePtg.class,
RefPtg.class, // [F6]
IntPtg.class, // [1]
IntPtg.class, // [1]
FuncVarPtg.class, // [OFFSET nArgs=3]
RangePtg.class,
RefPtg.class, // [G7]
RangePtg.class,
AttrPtg.class, // [sum ]
});
MemFuncPtg mf = (MemFuncPtg)ptgs[0];
assertEquals(57, mf.getLenRefSubexpression());
assertEquals("D4:E5", ((AreaPtgBase)ptgs[7]).toFormulaString());
assertTrue(((AttrPtg)ptgs[16]).isSum());
ptgs = parseFormula("SUM(A1:B2:C3:D4)");
confirmTokenClasses(ptgs, new Class[] {
// AttrPtg.class, // [volatile ] // POI doesn't do this yet (Apr 2009)
MemAreaPtg.class, // len 19
AreaPtg.class, // [A1:B2]
AreaPtg.class, // [C3:D4]
RangePtg.class,
AttrPtg.class, // [sum ]
});
MemAreaPtg ma = (MemAreaPtg)ptgs[0];
assertEquals(19, ma.getLenRefSubexpression());
}
/**
* Mostly confirming that erroneous conditions are detected. Actual error message wording is not critical.
*
*/
public void testEdgeCaseParserErrors() {
HSSFWorkbook wb = new HSSFWorkbook();
wb.createSheet("Sheet1");
confirmParseError(wb, "A1:ROUND(B1,1)", "The RHS of the range operator ':' at position 3 is not a proper reference.");
confirmParseError(wb, "Sheet1!Sheet1", "Cell reference expected after sheet name at index 8.");
confirmParseError(wb, "Sheet1!F:Sheet1!G", "'Sheet1!F' is not a proper reference.");
confirmParseError(wb, "Sheet1!F..foobar", "Complete area reference expected after sheet name at index 11.");
confirmParseError(wb, "Sheet1!A .. B", "Dotted range (full row or column) expression 'A .. B' must not contain whitespace.");
confirmParseError(wb, "Sheet1!A...B", "Dotted range (full row or column) expression 'A...B' must have exactly 2 dots.");
confirmParseError(wb, "Sheet1!A foobar", "Second part of cell reference expected after sheet name at index 10.");
confirmParseError(wb, "foobar", "Specified named range 'foobar' does not exist in the current workbook.");
confirmParseError(wb, "A1:1", "The RHS of the range operator ':' at position 3 is not a proper reference.");
}
private static void confirmParseError(HSSFWorkbook wb, String formula, String expectedMessage) {
try {
HSSFFormulaParser.parse(formula, wb);
throw new AssertionFailedError("Expected formula parse execption");
} catch (RuntimeException e) {
FormulaParserTestHelper.confirmParseException(e, expectedMessage);
}
}
}