Fix formula parser to properly support the range operator. Small fixes to parsing of sheet names and full column references.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@699487 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Josh Micich 2008-09-26 20:25:45 +00:00
parent da8c3b0dab
commit b1f74040ad
10 changed files with 548 additions and 346 deletions

View File

@ -76,14 +76,30 @@ public abstract class AreaPtgBase extends OperandPtg implements AreaI {
checkColumnBounds(lastColumn);
checkRowBounds(firstRow);
checkRowBounds(lastRow);
if (lastRow > firstRow) {
setFirstRow(firstRow);
setLastRow(lastRow);
setFirstColumn(firstColumn);
setLastColumn(lastColumn);
setFirstRowRelative(firstRowRelative);
setLastRowRelative(lastRowRelative);
} else {
setFirstRow(lastRow);
setLastRow(firstRow);
setFirstRowRelative(lastRowRelative);
setLastRowRelative(firstRowRelative);
}
if (lastColumn > firstColumn) {
setFirstColumn(firstColumn);
setLastColumn(lastColumn);
setFirstColRelative(firstColRelative);
setLastColRelative(lastColRelative);
} else {
setFirstColumn(lastColumn);
setLastColumn(firstColumn);
setFirstColRelative(lastColRelative);
setLastColRelative(firstColRelative);
}
}
private static void checkColumnBounds(int colIx) {

View File

@ -101,13 +101,27 @@ public final class SheetNameFormatter {
return true;
}
}
if (nameLooksLikeBooleanLiteral(rawSheetName)) {
return true;
}
// Error constant literals all contain '#' and other special characters
// so they don't get this far
return false;
}
private static boolean nameLooksLikeBooleanLiteral(String rawSheetName) {
switch(rawSheetName.charAt(0)) {
case 'T': case 't':
return "TRUE".equalsIgnoreCase(rawSheetName);
case 'F': case 'f':
return "FALSE".equalsIgnoreCase(rawSheetName);
}
return false;
}
/**
* @return <code>true</code> if the presence of the specified character in a sheet name would
* require the sheet name to be delimited in formulas. This includes every non-alphanumeric
* character besides underscore '_'.
* character besides underscore '_' and dot '.'.
*/
/* package */ static boolean isSpecialChar(char ch) {
// note - Character.isJavaIdentifierPart() would allow dollars '$'
@ -115,7 +129,8 @@ public final class SheetNameFormatter {
return false;
}
switch(ch) {
case '_': // underscore is ok
case '.': // dot is OK
case '_': // underscore is OK
return false;
case '\n':
case '\r':

View File

@ -101,10 +101,48 @@ public final class AreaReference {
/**
* Creates an area ref from a pair of Cell References.
* Also normalises such that the top-left
*/
public AreaReference(CellReference topLeft, CellReference botRight) {
boolean swapRows = topLeft.getRow() > botRight.getRow();
boolean swapCols = topLeft.getCol() > botRight.getCol();
if (swapRows || swapCols) {
int firstRow;
int lastRow;
int firstColumn;
int lastColumn;
boolean firstRowAbs;
boolean lastRowAbs;
boolean firstColAbs;
boolean lastColAbs;
if (swapRows) {
firstRow = botRight.getRow();
firstRowAbs = botRight.isRowAbsolute();
lastRow = topLeft.getRow();
lastRowAbs = topLeft.isRowAbsolute();
} else {
firstRow = topLeft.getRow();
firstRowAbs = topLeft.isRowAbsolute();
lastRow = botRight.getRow();
lastRowAbs = botRight.isRowAbsolute();
}
if (swapCols) {
firstColumn = botRight.getCol();
firstColAbs = botRight.isColAbsolute();
lastColumn = topLeft.getCol();
lastColAbs = topLeft.isColAbsolute();
} else {
firstColumn = topLeft.getCol();
firstColAbs = topLeft.isColAbsolute();
lastColumn = botRight.getCol();
lastColAbs = botRight.isColAbsolute();
}
_firstCell = new CellReference(firstRow, firstColumn, firstRowAbs, firstColAbs);
_lastCell = new CellReference(lastRow, lastColumn, lastRowAbs, lastColAbs);
} else {
_firstCell = topLeft;
_lastCell = botRight;
}
_isSingleCell = false;
}

View File

@ -34,6 +34,7 @@ public final class CellReference {
public static final class NameType {
public static final int CELL = 1;
public static final int NAMED_RANGE = 2;
public static final int COLUMN = 3;
public static final int BAD_CELL_OR_NAMED_RANGE = -1;
}
/** The character ($) that signifies a row or column value is absolute instead of relative */
@ -44,10 +45,16 @@ public final class CellReference {
private static final char SPECIAL_NAME_DELIMITER = '\'';
/**
* Matches a run of letters followed by a run of digits. The run of letters is group 1 and the
* run of digits is group 2. Each group may optionally be prefixed with a single '$'.
* Matches a run of one or more letters followed by a run of one or more digits.
* The run of letters is group 1 and the run of digits is group 2.
* Each group may optionally be prefixed with a single '$'.
*/
private static final Pattern CELL_REF_PATTERN = Pattern.compile("\\$?([A-Za-z]+)\\$?([0-9]+)");
/**
* Matches a run of one or more letters. The run of letters is group 1.
* The text may optionally be prefixed with a single '$'.
*/
private static final Pattern COLUMN_REF_PATTERN = Pattern.compile("\\$?([A-Za-z]+)");
/**
* Named range names must start with a letter or underscore. Subsequent characters may include
* digits or dot. (They can even end in dot).
@ -203,7 +210,7 @@ public final class CellReference {
// named range name
// This behaviour is a little weird. For example, "IW123" is a valid named range name
// because the column "IW" is beyond the maximum "IV". Note - this behaviour is version
// dependent. In Excel 2007, "IW123" is not a valid named range name.
// dependent. In BIFF12, "IW123" is not a valid named range name, but in BIFF8 it is.
if (str.indexOf(ABSOLUTE_REFERENCE_MARKER) >= 0) {
// Of course, named range names cannot have '$'
return NameType.BAD_CELL_OR_NAMED_RANGE;
@ -212,11 +219,17 @@ public final class CellReference {
}
private static int validateNamedRangeName(String str) {
Matcher colMatcher = COLUMN_REF_PATTERN.matcher(str);
if (colMatcher.matches()) {
String colStr = colMatcher.group(1);
if (isColumnWithnRange(colStr)) {
return NameType.COLUMN;
}
}
if (!NAMED_RANGE_NAME_PATTERN.matcher(str).matches()) {
return NameType.BAD_CELL_OR_NAMED_RANGE;
}
return NameType.NAMED_RANGE;
}
@ -257,23 +270,13 @@ public final class CellReference {
* @return <code>true</code> if the row and col parameters are within range of a BIFF8 spreadsheet.
*/
public static boolean cellReferenceIsWithinRange(String colStr, String rowStr) {
int numberOfLetters = colStr.length();
if(numberOfLetters > BIFF8_LAST_COLUMN_TEXT_LEN) {
// "Sheet1" case etc
return false; // that was easy
if (!isColumnWithnRange(colStr)) {
return false;
}
int nDigits = rowStr.length();
if(nDigits > BIFF8_LAST_ROW_TEXT_LEN) {
return false;
}
if(numberOfLetters == BIFF8_LAST_COLUMN_TEXT_LEN) {
if(colStr.toUpperCase().compareTo(BIFF8_LAST_COLUMN) > 0) {
return false;
}
} else {
// apparent column name has less chars than max
// no need to check range
}
if(nDigits == BIFF8_LAST_ROW_TEXT_LEN) {
// ASCII comparison is valid if digit count is same
@ -288,6 +291,23 @@ public final class CellReference {
return true;
}
private static boolean isColumnWithnRange(String colStr) {
int numberOfLetters = colStr.length();
if(numberOfLetters > BIFF8_LAST_COLUMN_TEXT_LEN) {
// "Sheet1" case etc
return false; // that was easy
}
if(numberOfLetters == BIFF8_LAST_COLUMN_TEXT_LEN) {
if(colStr.toUpperCase().compareTo(BIFF8_LAST_COLUMN) > 0) {
return false;
}
} else {
// apparent column name has less chars than max
// no need to check range
}
return true;
}
/**
* Separates the row from the columns and returns an array of three Strings. The first element
* is the sheet name. Only the first element may be null. The second element in is the column

View File

@ -49,6 +49,7 @@ import org.apache.poi.hssf.record.formula.ParenthesisPtg;
import org.apache.poi.hssf.record.formula.PercentPtg;
import org.apache.poi.hssf.record.formula.PowerPtg;
import org.apache.poi.hssf.record.formula.Ptg;
import org.apache.poi.hssf.record.formula.RangePtg;
import org.apache.poi.hssf.record.formula.Ref3DPtg;
import org.apache.poi.hssf.record.formula.RefPtg;
import org.apache.poi.hssf.record.formula.StringPtg;
@ -81,6 +82,33 @@ import org.apache.poi.hssf.util.CellReference.NameType;
* @author Josh Micich
*/
public final class FormulaParser {
private static final class Identifier {
private final String _name;
private final boolean _isQuoted;
public Identifier(String name, boolean isQuoted) {
_name = name;
_isQuoted = isQuoted;
}
public String getName() {
return _name;
}
public boolean isQuoted() {
return _isQuoted;
}
public String toString() {
StringBuffer sb = new StringBuffer(64);
sb.append(getClass().getName());
sb.append(" [");
if (_isQuoted) {
sb.append("'").append(_name).append("'");
} else {
sb.append(_name);
}
sb.append("]");
return sb.toString();
}
}
/**
* Specific exception thrown when a supplied formula does not parse properly.<br/>
@ -176,23 +204,23 @@ public final class FormulaParser {
}
/** Recognize an Alpha Character */
private boolean IsAlpha(char c) {
private static boolean IsAlpha(char c) {
return Character.isLetter(c) || c == '$' || c=='_';
}
/** Recognize a Decimal Digit */
private boolean IsDigit(char c) {
private static boolean IsDigit(char c) {
return Character.isDigit(c);
}
/** Recognize an Alphanumeric */
private boolean IsAlNum(char c) {
return (IsAlpha(c) || IsDigit(c));
private static boolean IsAlNum(char c) {
return IsAlpha(c) || IsDigit(c);
}
/** Recognize White Space */
private boolean IsWhite( char c) {
return (c ==' ' || c== TAB);
private static boolean IsWhite( char c) {
return c ==' ' || c== TAB;
}
/** Skip Over Leading White Space */
@ -213,7 +241,13 @@ public final class FormulaParser {
}
GetChar();
}
private String parseUnquotedIdentifier() {
Identifier iden = parseIdentifier();
if (iden.isQuoted()) {
throw expected("unquoted identifier");
}
return iden.getName();
}
/**
* Parses a sheet name, named range name, or simple cell reference.<br/>
* Note - identifiers in Excel can contain dots, so this method may return a String
@ -221,18 +255,17 @@ public final class FormulaParser {
* may return a value like "A1..B2", in which case the caller must convert it to
* an area reference like "A1:B2"
*/
private String parseIdentifier() {
StringBuffer Token = new StringBuffer();
private Identifier parseIdentifier() {
StringBuffer sb = new StringBuffer();
if (!IsAlpha(look) && look != '\'') {
throw expected("Name");
}
if(look == '\'')
{
boolean isQuoted = look == '\'';
if(isQuoted) {
Match('\'');
boolean done = look == '\'';
while(!done)
{
Token.append(look);
while(!done) {
sb.append(look);
GetChar();
if(look == '\'')
{
@ -240,17 +273,15 @@ public final class FormulaParser {
done = look != '\'';
}
}
}
else
{
} else {
// allow for any sequence of dots and identifier chars
// special case of two consecutive dots is best treated in the calling code
while (IsAlNum(look) || look == '.') {
Token.append(look);
sb.append(look);
GetChar();
}
}
return Token.toString();
return new Identifier(sb.toString(), isQuoted);
}
/** Get a Number */
@ -265,72 +296,112 @@ public final class FormulaParser {
}
private ParseNode parseFunctionReferenceOrName() {
String name = parseIdentifier();
Identifier iden = parseIdentifier();
if (look == '('){
//This is a function
return function(name);
return function(iden.getName());
}
return new ParseNode(parseNameOrReference(name));
if (!iden.isQuoted()) {
String name = iden.getName();
if (name.equalsIgnoreCase("TRUE") || name.equalsIgnoreCase("FALSE")) {
return new ParseNode(new BoolPtg(name.toUpperCase()));
}
}
return parseRangeExpression(iden);
}
private Ptg parseNameOrReference(String name) {
private ParseNode parseRangeExpression(Identifier iden) {
Ptg ptgA = parseNameOrCellRef(iden);
if (look == ':') {
GetChar();
Identifier iden2 = parseIdentifier();
Ptg ptgB = parseNameOrCellRef(iden2);
Ptg simplified = reduceRangeExpression(ptgA, ptgB);
if (simplified == null) {
ParseNode[] children = {
new ParseNode(ptgA),
new ParseNode(ptgB),
};
return new ParseNode(RangePtg.instance, children);
}
return new ParseNode(simplified);
}
return new ParseNode(ptgA);
}
/**
*
* "A1", "B3" -> "A1:B3"
* "sheet1!A1", "B3" -> "sheet1!A1:B3"
*
* @return <code>null</code> if the range expression cannot / shouldn't be reduced.
*/
private static Ptg reduceRangeExpression(Ptg ptgA, Ptg ptgB) {
if (!(ptgB instanceof RefPtg)) {
// only when second ref is simple 2-D ref can the range
// expression be converted to an area ref
return null;
}
RefPtg refB = (RefPtg) ptgB;
if (ptgA instanceof RefPtg) {
RefPtg refA = (RefPtg) ptgA;
return new AreaPtg(refA.getRow(), refB.getRow(), refA.getColumn(), refB.getColumn(),
refA.isRowRelative(), refB.isRowRelative(), refA.isColRelative(), refB.isColRelative());
}
if (ptgA instanceof Ref3DPtg) {
Ref3DPtg refA = (Ref3DPtg) ptgA;
return new Area3DPtg(refA.getRow(), refB.getRow(), refA.getColumn(), refB.getColumn(),
refA.isRowRelative(), refB.isRowRelative(), refA.isColRelative(), refB.isColRelative(),
refA.getExternSheetIndex());
}
// Note - other operand types (like AreaPtg) which probably can't evaluate
// do not cause validation errors at parse time
return null;
}
private Ptg parseNameOrCellRef(Identifier iden) {
if (look == '!') {
GetChar();
// 3-D ref
// this code assumes iden is a sheetName
// TODO - handle <book name> ! <named range name>
int externIdx = book.getExternalSheetIndex(iden.getName());
String secondIden = parseUnquotedIdentifier();
AreaReference areaRef = parseArea(secondIden);
if (areaRef == null) {
return new Ref3DPtg(secondIden, externIdx);
}
// will happen if dots are used instead of colon
return new Area3DPtg(areaRef.formatAsString(), externIdx);
}
String name = iden.getName();
AreaReference areaRef = parseArea(name);
if (areaRef != null) {
// will happen if dots are used instead of colon
return new AreaPtg(areaRef.formatAsString());
}
if (look == ':' || look == '.') { // this is a AreaReference
GetChar();
while (look == '.') { // formulas can have . or .. or ... instead of :
GetChar();
}
String first = name;
String second = parseIdentifier();
return new AreaPtg(first+":"+second);
}
if (look == '!') {
Match('!');
String sheetName = name;
String first = parseIdentifier();
int externIdx = book.getExternalSheetIndex(sheetName);
areaRef = parseArea(name);
if (areaRef != null) {
// will happen if dots are used instead of colon
return new Area3DPtg(areaRef.formatAsString(), externIdx);
}
if (look == ':') {
Match(':');
String second=parseIdentifier();
if (look == '!') {
//The sheet name was included in both of the areas. Only really
//need it once
Match('!');
String third=parseIdentifier();
if (!sheetName.equals(second))
throw new RuntimeException("Unhandled double sheet reference.");
return new Area3DPtg(first+":"+third,externIdx);
}
return new Area3DPtg(first+":"+second,externIdx);
}
return new Ref3DPtg(first, externIdx);
}
if (name.equalsIgnoreCase("TRUE") || name.equalsIgnoreCase("FALSE")) {
return new BoolPtg(name.toUpperCase());
}
// This can be either a cell ref or a named range
// Try to spot which it is
int nameType = CellReference.classifyCellReference(name);
if (nameType == NameType.CELL) {
return new RefPtg(name);
}
if (look == ':') {
if (nameType == NameType.COLUMN) {
GetChar();
String secondIden = parseUnquotedIdentifier();
if (CellReference.classifyCellReference(secondIden) != NameType.COLUMN) {
throw new FormulaParseException("Expected full column after '" + name
+ ":' but got '" + secondIden + "'");
}
return new AreaPtg(name + ":" + secondIden);
}
}
if (nameType != NameType.NAMED_RANGE) {
new FormulaParseException("Name '" + name
+ "' does not look like a cell reference or named range");
@ -662,7 +733,7 @@ public final class FormulaParser {
}
private Boolean parseBooleanLiteral() {
String iden = parseIdentifier();
String iden = parseUnquotedIdentifier();
if ("TRUE".equalsIgnoreCase(iden)) {
return Boolean.TRUE;
}
@ -720,7 +791,7 @@ public final class FormulaParser {
private int parseErrorLiteral() {
Match('#');
String part1 = parseIdentifier().toUpperCase();
String part1 = parseUnquotedIdentifier().toUpperCase();
switch(part1.charAt(0)) {
case 'V':

View File

@ -19,6 +19,7 @@ package org.apache.poi.ss.formula;
import org.apache.poi.hssf.record.formula.AbstractFunctionPtg;
import org.apache.poi.hssf.record.formula.ControlPtg;
import org.apache.poi.hssf.record.formula.RangePtg;
import org.apache.poi.hssf.record.formula.ValueOperatorPtg;
import org.apache.poi.hssf.record.formula.Ptg;
@ -115,6 +116,10 @@ final class OperandClassTransformer {
return;
}
if (children.length > 0) {
if (token == RangePtg.instance) {
// TODO is any token transformation required under the various ref operators?
return;
}
throw new IllegalStateException("Node should not have any children");
}

View File

@ -880,6 +880,37 @@ public final class TestFormulaParser extends TestCase {
Object[][] values = aptg.getTokenArrayValues();
assertEquals(ErrorConstant.valueOf(HSSFErrorConstants.ERROR_REF), values[0][3]);
assertEquals(Boolean.FALSE, values[1][0]);
}
public void testRangeOperator() {
HSSFWorkbook wb = new HSSFWorkbook();
HSSFSheet sheet = wb.createSheet();
HSSFCell cell = sheet.createRow(0).createCell(0);
wb.setSheetName(0, "Sheet1");
cell.setCellFormula("Sheet1!B$4:Sheet1!$C1"); // explicit range ':' operator
assertEquals("Sheet1!B$4:Sheet1!$C1", cell.getCellFormula());
cell.setCellFormula("Sheet1!B$4:$C1"); // plain area ref
assertEquals("Sheet1!B1:$C$4", cell.getCellFormula()); // note - area ref is normalised
cell.setCellFormula("Sheet1!$C1...B$4"); // different syntax for plain area ref
assertEquals("Sheet1!B1:$C$4", cell.getCellFormula());
// with funny sheet name
wb.setSheetName(0, "A1...A2");
cell.setCellFormula("A1...A2!B1");
assertEquals("A1...A2!B1", cell.getCellFormula());
}
public void testBooleanNamedSheet() {
HSSFWorkbook wb = new HSSFWorkbook();
HSSFSheet sheet = wb.createSheet("true");
HSSFCell cell = sheet.createRow(0).createCell(0);
cell.setCellFormula("'true'!B2");
assertEquals("'true'!B2", cell.getCellFormula());
}
}

View File

@ -20,16 +20,12 @@ package org.apache.poi.hssf.record.formula;
import junit.framework.TestCase;
/**
* Tests for SheetNameFormatter
* Tests for {@link SheetNameFormatter}
*
* @author Josh Micich
*/
public final class TestSheetNameFormatter extends TestCase {
public TestSheetNameFormatter(String testName) {
super(testName);
}
private static void confirmFormat(String rawSheetName, String expectedSheetNameEncoding) {
assertEquals(expectedSheetNameEncoding, SheetNameFormatter.format(rawSheetName));
}
@ -55,6 +51,16 @@ public final class TestSheetNameFormatter extends TestCase {
confirmFormat("TAXRETURN19980415", "TAXRETURN19980415");
}
public void testBooleanLiterals() {
confirmFormat("TRUE", "'TRUE'");
confirmFormat("FALSE", "'FALSE'");
confirmFormat("True", "'True'");
confirmFormat("fAlse", "'fAlse'");
confirmFormat("Yes", "Yes");
confirmFormat("No", "No");
}
private static void confirmCellNameMatch(String rawSheetName, boolean expected) {
assertEquals(expected, SheetNameFormatter.nameLooksLikePlainCellReference(rawSheetName));
}

View File

@ -146,7 +146,7 @@ public final class TestFormulaBugs extends TestCase {
throw new AssertionFailedError("Identified bug 42448");
}
assertEquals("SUMPRODUCT(A!C7:C67,B8:B68)/B69", cell.getCellFormula());
assertEquals("SUMPRODUCT(A!C7:A!C67,B8:B68)/B69", cell.getCellFormula());
// might as well evaluate the sucker...

View File

@ -1064,7 +1064,7 @@ public final class TestFormulas extends TestCase {
/** Unknown Ptg 3D*/
public void test27272_2() throws Exception {
HSSFWorkbook wb = openSample("27272_2.xls");
assertEquals("Reference for named range ", "'LOAD.POD_HISTORIES'!#REF!",wb.getNameAt(0).getReference());
assertEquals("LOAD.POD_HISTORIES!#REF!", wb.getNameAt(0).getReference());
File outF = File.createTempFile("bug27272_2",".xls");
wb.write(new FileOutputStream(outF));
System.out.println("Open "+outF.getAbsolutePath()+" in Excel");