Fix for bug 45354 - Proper distinguishing of cell references and named ranges within formulas

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@674953 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Josh Micich 2008-07-08 21:00:13 +00:00
parent d9c09dd5ea
commit 718befdc6a
8 changed files with 260 additions and 103 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! -->
<release version="3.1.1-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="fix">45354 - Fixed recognition of named ranges within formulas</action>
<action dev="POI-DEVELOPERS" type="fix">45338 - Fix HSSFWorkbook to give you the same HSSFFont every time, and then fix it to find newly added fonts</action>
<action dev="POI-DEVELOPERS" type="fix">45336 - Fix HSSFColor.getTripletHash()</action>
<action dev="POI-DEVELOPERS" type="fix">45334 - Fixed formula parser to handle dots in identifiers</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.1.1-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="fix">45354 - Fixed recognition of named ranges within formulas</action>
<action dev="POI-DEVELOPERS" type="fix">45338 - Fix HSSFWorkbook to give you the same HSSFFont every time, and then fix it to find newly added fonts</action>
<action dev="POI-DEVELOPERS" type="fix">45336 - Fix HSSFColor.getTripletHash()</action>
<action dev="POI-DEVELOPERS" type="fix">45334 - Fixed formula parser to handle dots in identifiers</action>

View File

@ -29,6 +29,7 @@ import org.apache.poi.hssf.record.formula.function.FunctionMetadataRegistry;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.util.AreaReference;
import org.apache.poi.hssf.util.CellReference;
import org.apache.poi.hssf.util.CellReference.NameType;
/**
* This class parses a formula string into a List of tokens in RPN order.
@ -293,9 +294,14 @@ public final class FormulaParser {
// This can be either a cell ref or a named range
// Try to spot which it is
if (isValidCellReference(name)) {
int nameType = CellReference.classifyCellReference(name);
if (nameType == NameType.CELL) {
return new RefPtg(name);
}
if (nameType != NameType.NAMED_RANGE) {
new FormulaParseException("Name '" + name
+ "' does not look like a cell reference or named range");
}
for(int i = 0; i < book.getNumberOfNames(); i++) {
// named range name matching is case insensitive
@ -303,11 +309,12 @@ public final class FormulaParser {
return new NamePtg(name, book);
}
}
throw new FormulaParseException("Found reference to named range \""
+ name + "\", but that named range wasn't defined!");
throw new FormulaParseException("Specified named range '"
+ name + "' does not exist in the current workbook.");
}
/**
* @param name an 'identifier' like string (i.e. contains alphanums, and dots)
* @return <code>null</code> if name cannot be split at a dot
*/
private AreaReference parseArea(String name) {
@ -323,6 +330,8 @@ public final class FormulaParser {
return null;
}
}
// This expression is only valid as an area ref, if the LHS and RHS of the dot(s) are both
// cell refs. Otherwise, this expression must be a named range name
String partA = name.substring(0, dotPos);
if (!isValidCellReference(partA)) {
return null;
@ -336,12 +345,14 @@ public final class FormulaParser {
return new AreaReference(topLeft, bottomRight);
}
/**
* @return <code>true</code> if the specified name is a valid cell reference
*/
private static boolean isValidCellReference(String str) {
// TODO - exact rules for recognising cell references may be too complicated for regex
return CELL_REFERENCE_PATTERN.matcher(str).matches();
return CellReference.classifyCellReference(str) == NameType.CELL;
}
/**
* Note - Excel function names are 'case aware but not case sensitive'. This method may end
* up creating a defined name record in the workbook if the specified name is not an internal

View File

@ -14,13 +14,14 @@
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record.formula;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.poi.hssf.util.CellReference;
/**
* Formats sheet names for use in formula expressions.
*
@ -28,14 +29,12 @@ import java.util.regex.Pattern;
*/
public final class SheetNameFormatter {
private static final String BIFF8_LAST_COLUMN = "IV";
private static final int BIFF8_LAST_COLUMN_TEXT_LEN = BIFF8_LAST_COLUMN.length();
private static final String BIFF8_LAST_ROW = String.valueOf(0x10000);
private static final int BIFF8_LAST_ROW_TEXT_LEN = BIFF8_LAST_ROW.length();
private static final char DELIMITER = '\'';
private static final Pattern CELL_REF_PATTERN = Pattern.compile("([A-Za-z])+[0-9]+");
/**
* Matches a single cell ref with no absolute ('$') markers
*/
private static final Pattern CELL_REF_PATTERN = Pattern.compile("([A-Za-z]+)([0-9]+)");
private SheetNameFormatter() {
// no instances of this class
@ -105,27 +104,27 @@ public final class SheetNameFormatter {
return false;
}
/**
* @return <code>true</code> if the presence of the specified character in a sheet name would
* require the sheet name to be delimited in formulas. This includes every non-alphanumeric
* character besides underscore '_'.
*/
/* package */ static boolean isSpecialChar(char ch) {
// note - Character.isJavaIdentifierPart() would allow dollars '$'
if(Character.isLetterOrDigit(ch)) {
return false;
}
switch(ch) {
case '_': // underscore is ok
return false;
case '\n':
case '\r':
case '\t':
throw new RuntimeException("Illegal character (0x"
+ Integer.toHexString(ch) + ") found in sheet name");
}
return true;
}
/**
* @return <code>true</code> if the presence of the specified character in a sheet name would
* require the sheet name to be delimited in formulas. This includes every non-alphanumeric
* character besides underscore '_'.
*/
/* package */ static boolean isSpecialChar(char ch) {
// note - Character.isJavaIdentifierPart() would allow dollars '$'
if(Character.isLetterOrDigit(ch)) {
return false;
}
switch(ch) {
case '_': // underscore is ok
return false;
case '\n':
case '\r':
case '\t':
throw new RuntimeException("Illegal character (0x"
+ Integer.toHexString(ch) + ") found in sheet name");
}
return true;
}
/**
@ -149,64 +148,11 @@ public final class SheetNameFormatter {
* <p/>
* For better or worse this implementation attempts to replicate Excel's formula renderer.
* Excel uses range checking on the apparent 'row' and 'column' components. Note however that
* the maximum sheet size varies across versions:
* <p/>
* <blockquote><table border="0" cellpadding="1" cellspacing="0"
* summary="Notable cases.">
* <tr><th>Version&nbsp;&nbsp;</th><th>File Format&nbsp;&nbsp;</th>
* <th>Last Column&nbsp;&nbsp;</th><th>Last Row</th></tr>
* <tr><td>97-2003</td><td>BIFF8</td><td>"IV" (2^8)</td><td>65536 (2^14)</td></tr>
* <tr><td>2007</td><td>BIFF12</td><td>"XFD" (2^14)</td><td>1048576 (2^20)</td></tr>
* </table></blockquote>
* POI currently targets BIFF8 (Excel 97-2003), so the following behaviour can be observed for
* this method:
* <blockquote><table border="0" cellpadding="1" cellspacing="0"
* summary="Notable cases.">
* <tr><th>Input&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</th>
* <th>Result&nbsp;</th></tr>
* <tr><td>"A1", 1</td><td>true</td></tr>
* <tr><td>"a111", 1</td><td>true</td></tr>
* <tr><td>"A65536", 1</td><td>true</td></tr>
* <tr><td>"A65537", 1</td><td>false</td></tr>
* <tr><td>"iv1", 2</td><td>true</td></tr>
* <tr><td>"IW1", 2</td><td>false</td></tr>
* <tr><td>"AAA1", 3</td><td>false</td></tr>
* <tr><td>"a111", 1</td><td>true</td></tr>
* <tr><td>"Sheet1", 6</td><td>false</td></tr>
* </table></blockquote>
* the maximum sheet size varies across versions.
* @see org.apache.poi.hssf.util.CellReference
*/
/* package */ static boolean cellReferenceIsWithinRange(String rawSheetName, int numberOfLetters) {
if(numberOfLetters > BIFF8_LAST_COLUMN_TEXT_LEN) {
// "Sheet1" case etc
return false; // that was easy
}
int nDigits = rawSheetName.length() - numberOfLetters;
if(nDigits > BIFF8_LAST_ROW_TEXT_LEN) {
return false;
}
if(numberOfLetters == BIFF8_LAST_COLUMN_TEXT_LEN) {
String colStr = rawSheetName.substring(0, BIFF8_LAST_COLUMN_TEXT_LEN).toUpperCase();
if(colStr.compareTo(BIFF8_LAST_COLUMN) > 0) {
return false;
}
} else {
// apparent column name has less chars than max
// no need to check range
}
if(nDigits == BIFF8_LAST_ROW_TEXT_LEN) {
String colStr = rawSheetName.substring(numberOfLetters);
// ASCII comparison is valid if digit count is same
if(colStr.compareTo(BIFF8_LAST_ROW) > 0) {
return false;
}
} else {
// apparent row has less chars than max
// no need to check range
}
return true;
/* package */ static boolean cellReferenceIsWithinRange(String lettersPrefix, String numbersSuffix) {
return CellReference.cellReferenceIsWithinRange(lettersPrefix, numbersSuffix);
}
/**
@ -239,7 +185,7 @@ public final class SheetNameFormatter {
// rawSheetName == "Sheet1" gets this far.
String lettersPrefix = matcher.group(1);
return cellReferenceIsWithinRange(rawSheetName, lettersPrefix.length());
String numbersSuffix = matcher.group(2);
return cellReferenceIsWithinRange(lettersPrefix, numbersSuffix);
}
}

View File

@ -17,6 +17,9 @@
package org.apache.poi.hssf.util;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.poi.hssf.record.formula.SheetNameFormatter;
/**
@ -25,6 +28,14 @@ import org.apache.poi.hssf.record.formula.SheetNameFormatter;
* @author Dennis Doubleday (patch to seperateRowColumns())
*/
public final class CellReference {
/**
* Used to classify identifiers found in formulas as cell references or not.
*/
public static final class NameType {
public static final int CELL = 1;
public static final int NAMED_RANGE = 2;
public static final int BAD_CELL_OR_NAMED_RANGE = -1;
}
/** The character ($) that signifies a row or column value is absolute instead of relative */
private static final char ABSOLUTE_REFERENCE_MARKER = '$';
/** The character (!) that separates sheet names from cell references */
@ -32,6 +43,20 @@ public final class CellReference {
/** The character (') used to quote sheet names when they contain special characters */
private static final char SPECIAL_NAME_DELIMITER = '\'';
/**
* Matches a run of letters followed by a run of digits. The run of letters is group 1 and the
* run of digits is group 2. Each group may optionally be prefixed with a single '$'.
*/
private static final Pattern CELL_REF_PATTERN = Pattern.compile("\\$?([A-Za-z]+)\\$?([0-9]+)");
/**
* Named range names must start with a letter or underscore. Subsequent characters may include
* digits or dot. (They can even end in dot).
*/
private static final Pattern NAMED_RANGE_NAME_PATTERN = Pattern.compile("[_A-Za-z][_.A-Za-z0-9]*");
private static final String BIFF8_LAST_COLUMN = "IV";
private static final int BIFF8_LAST_COLUMN_TEXT_LEN = BIFF8_LAST_COLUMN.length();
private static final String BIFF8_LAST_ROW = String.valueOf(0x10000);
private static final int BIFF8_LAST_ROW_TEXT_LEN = BIFF8_LAST_ROW.length();
private final int _rowIndex;
private final int _colIndex;
@ -119,8 +144,131 @@ public final class CellReference {
return retval-1;
}
/**
* Classifies an identifier as either a simple (2D) cell reference or a named range name
* @return one of the values from <tt>NameType</tt>
*/
public static int classifyCellReference(String str) {
int len = str.length();
if (len < 1) {
throw new IllegalArgumentException("Empty string not allowed");
}
char firstChar = str.charAt(0);
switch (firstChar) {
case ABSOLUTE_REFERENCE_MARKER:
case '.':
case '_':
break;
default:
if (!Character.isLetter(firstChar)) {
throw new IllegalArgumentException("Invalid first char (" + firstChar
+ ") of cell reference or named range. Letter expected");
}
}
if (!Character.isDigit(str.charAt(len-1))) {
// no digits at end of str
return validateNamedRangeName(str);
}
Matcher cellRefPatternMatcher = CELL_REF_PATTERN.matcher(str);
if (!cellRefPatternMatcher.matches()) {
return validateNamedRangeName(str);
}
String lettersGroup = cellRefPatternMatcher.group(1);
String digitsGroup = cellRefPatternMatcher.group(2);
if (cellReferenceIsWithinRange(lettersGroup, digitsGroup)) {
// valid cell reference
return NameType.CELL;
}
// If str looks like a cell reference, but is out of (row/col) range, it is a valid
// named range name
// This behaviour is a little weird. For example, "IW123" is a valid named range name
// because the column "IW" is beyond the maximum "IV". Note - this behaviour is version
// dependent. In Excel 2007, "IW123" is not a valid named range name.
if (str.indexOf(ABSOLUTE_REFERENCE_MARKER) >= 0) {
// Of course, named range names cannot have '$'
return NameType.BAD_CELL_OR_NAMED_RANGE;
}
return NameType.NAMED_RANGE;
}
private static int validateNamedRangeName(String str) {
if (!NAMED_RANGE_NAME_PATTERN.matcher(str).matches()) {
return NameType.BAD_CELL_OR_NAMED_RANGE;
}
return NameType.NAMED_RANGE;
}
/**
* Used to decide whether a name of the form "[A-Z]*[0-9]*" that appears in a formula can be
* interpreted as a cell reference. Names of that form can be also used for sheets and/or
* named ranges, and in those circumstances, the question of whether the potential cell
* reference is valid (in range) becomes important.
* <p/>
* Note - that the maximum sheet size varies across Excel versions:
* <p/>
* <blockquote><table border="0" cellpadding="1" cellspacing="0"
* summary="Notable cases.">
* <tr><th>Version&nbsp;&nbsp;</th><th>File Format&nbsp;&nbsp;</th>
* <th>Last Column&nbsp;&nbsp;</th><th>Last Row</th></tr>
* <tr><td>97-2003</td><td>BIFF8</td><td>"IV" (2^8)</td><td>65536 (2^14)</td></tr>
* <tr><td>2007</td><td>BIFF12</td><td>"XFD" (2^14)</td><td>1048576 (2^20)</td></tr>
* </table></blockquote>
* POI currently targets BIFF8 (Excel 97-2003), so the following behaviour can be observed for
* this method:
* <blockquote><table border="0" cellpadding="1" cellspacing="0"
* summary="Notable cases.">
* <tr><th>Input&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</th>
* <th>Result&nbsp;</th></tr>
* <tr><td>"A", "1"</td><td>true</td></tr>
* <tr><td>"a", "111"</td><td>true</td></tr>
* <tr><td>"A", "65536"</td><td>true</td></tr>
* <tr><td>"A", "65537"</td><td>false</td></tr>
* <tr><td>"iv", "1"</td><td>true</td></tr>
* <tr><td>"IW", "1"</td><td>false</td></tr>
* <tr><td>"AAA", "1"</td><td>false</td></tr>
* <tr><td>"a", "111"</td><td>true</td></tr>
* <tr><td>"Sheet", "1"</td><td>false</td></tr>
* </table></blockquote>
*
* @param colStr a string of only letter characters
* @param rowStr a string of only digit characters
* @return <code>true</code> if the row and col parameters are within range of a BIFF8 spreadsheet.
*/
public static boolean cellReferenceIsWithinRange(String colStr, String rowStr) {
int numberOfLetters = colStr.length();
if(numberOfLetters > BIFF8_LAST_COLUMN_TEXT_LEN) {
// "Sheet1" case etc
return false; // that was easy
}
int nDigits = rowStr.length();
if(nDigits > BIFF8_LAST_ROW_TEXT_LEN) {
return false;
}
if(numberOfLetters == BIFF8_LAST_COLUMN_TEXT_LEN) {
if(colStr.toUpperCase().compareTo(BIFF8_LAST_COLUMN) > 0) {
return false;
}
} else {
// apparent column name has less chars than max
// no need to check range
}
if(nDigits == BIFF8_LAST_ROW_TEXT_LEN) {
// ASCII comparison is valid if digit count is same
if(rowStr.compareTo(BIFF8_LAST_ROW) > 0) {
return false;
}
} else {
// apparent row has less chars than max
// no need to check range
}
return true;
}
/**
* Separates the row from the columns and returns an array of three Strings. The first element
* is the sheet name. Only the first element may be null. The second element in is the column
* name still in ALPHA-26 number format. The third element is the row.

View File

@ -46,6 +46,7 @@ import org.apache.poi.hssf.record.formula.SubtractPtg;
import org.apache.poi.hssf.record.formula.UnaryMinusPtg;
import org.apache.poi.hssf.record.formula.UnaryPlusPtg;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFName;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
@ -791,4 +792,37 @@ public final class TestFormulaParser extends TestCase {
assertEquals("ERROR.TYPE", funcPtg.getName());
}
public void testNamedRangeThatLooksLikeCell() {
HSSFWorkbook wb = new HSSFWorkbook();
HSSFSheet sheet = wb.createSheet("Sheet1");
HSSFName name = wb.createName();
name.setReference("Sheet1!B1");
name.setNameName("pfy1");
Ptg[] ptgs;
try {
ptgs = FormulaParser.parse("count(pfy1)", wb);
} catch (IllegalArgumentException e) {
if (e.getMessage().equals("Specified colIx (1012) is out of range")) {
throw new AssertionFailedError("Identified bug 45354");
}
throw e;
}
assertEquals(2, ptgs.length);
assertEquals(NamePtg.class, ptgs[0].getClass());
HSSFCell cell = sheet.createRow(0).createCell((short)0);
cell.setCellFormula("count(pfy1)");
assertEquals("COUNT(pfy1)", cell.getCellFormula());
try {
cell.setCellFormula("count(pf1)");
throw new AssertionFailedError("Expected formula parse execption");
} catch (FormulaParseException e) {
if (!e.getMessage().equals("Specified named range 'pf1' does not exist in the current workbook.")) {
throw e;
}
}
cell.setCellFormula("count(fp1)"); // plain cell ref, col is in range
}
}

View File

@ -14,7 +14,6 @@
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record.formula;
@ -76,7 +75,9 @@ public final class TestSheetNameFormatter extends TestCase {
}
private static void confirmCellRange(String text, int numberOfPrefixLetters, boolean expected) {
assertEquals(expected, SheetNameFormatter.cellReferenceIsWithinRange(text, numberOfPrefixLetters));
String prefix = text.substring(0, numberOfPrefixLetters);
String suffix = text.substring(numberOfPrefixLetters);
assertEquals(expected, SheetNameFormatter.cellReferenceIsWithinRange(prefix, suffix));
}
/**
@ -93,7 +94,7 @@ public final class TestSheetNameFormatter extends TestCase {
confirmCellRange("AAA1", 3, false);
confirmCellRange("a111", 1, true);
confirmCellRange("Sheet1", 6, false);
confirmCellRange("iV65536", 2, true); // max cell in Excel 97-2003
confirmCellRange("IW65537", 2, false);
confirmCellRange("iV65536", 2, true); // max cell in Excel 97-2003
confirmCellRange("IW65537", 2, false);
}
}

View File

@ -20,6 +20,8 @@ package org.apache.poi.hssf.util;
import junit.framework.TestCase;
import org.apache.poi.hssf.util.CellReference.NameType;
public final class TestCellReference extends TestCase {
@ -75,7 +77,6 @@ public final class TestCellReference extends TestCase {
confirmCell(cf, "Amazing!", 0, 0, false, false, "'Amazing!'!A1");
}
/* package */ static void confirmCell(CellReference cf, String expSheetName, int expRow,
int expCol, boolean expIsRowAbs, boolean expIsColAbs, String expText) {
@ -87,8 +88,22 @@ public final class TestCellReference extends TestCase {
assertEquals("text is wrong", expText, cf.formatAsString());
}
public static void main(String [] args) {
System.out.println("Testing org.apache.poi.hssf.util.TestCellReference");
junit.textui.TestRunner.run(TestCellReference.class);
public void testClassifyCellReference() {
confirmNameType("a1", NameType.CELL);
confirmNameType("pfy1", NameType.NAMED_RANGE);
confirmNameType("pf1", NameType.NAMED_RANGE); // (col) out of cell range
confirmNameType("fp1", NameType.CELL);
confirmNameType("pf$1", NameType.BAD_CELL_OR_NAMED_RANGE);
confirmNameType("_A1", NameType.NAMED_RANGE);
confirmNameType("A_1", NameType.NAMED_RANGE);
confirmNameType("A1_", NameType.NAMED_RANGE);
confirmNameType(".A1", NameType.BAD_CELL_OR_NAMED_RANGE);
confirmNameType("A.1", NameType.NAMED_RANGE);
confirmNameType("A1.", NameType.NAMED_RANGE);
}
private void confirmNameType(String ref, int expectedResult) {
int actualResult = CellReference.classifyCellReference(ref);
assertEquals(expectedResult, actualResult);
}
}