Bugzilla 47312 - Fixed formula parser to properly reject cell references with a '0' row component

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@781616 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Josh Micich 2009-06-03 23:23:13 +00:00
parent 522d5c6892
commit fd7ce95af9
4 changed files with 130 additions and 61 deletions

View File

@ -35,6 +35,7 @@
<release version="3.5-beta7" date="2009-??-??">
</release>
<release version="3.5-beta6" date="2009-06-11">
<action dev="POI-DEVELOPERS" type="fix">47312 - Fixed formula parser to properly reject cell references with a '0' row component</action>
<action dev="POI-DEVELOPERS" type="fix">47199 - Fixed PageSettingsBlock/Sheet to tolerate margin records after other non-PSB records</action>
<action dev="POI-DEVELOPERS" type="fix">47069 - Fixed HSSFSheet#getFirstRowNum and HSSFSheet#getLastRowNum to return correct values after removal of all rows</action>
<action dev="POI-DEVELOPERS" type="fix">47278 - Fixed XSSFCell to avoid generating xsi:nil entries in shared string table</action>

View File

@ -21,7 +21,6 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.poi.hssf.record.formula.SheetNameFormatter;
import org.apache.poi.hssf.record.formula.function.FunctionMetadataRegistry;
import org.apache.poi.ss.SpreadsheetVersion;
/**
@ -40,21 +39,21 @@ public class CellReference {
public static final int BAD_CELL_OR_NAMED_RANGE = -1;
}
/** The character ($) that signifies a row or column value is absolute instead of relative */
/** The character ($) that signifies a row or column value is absolute instead of relative */
private static final char ABSOLUTE_REFERENCE_MARKER = '$';
/** The character (!) that separates sheet names from cell references */
/** The character (!) that separates sheet names from cell references */
private static final char SHEET_NAME_DELIMITER = '!';
/** The character (') used to quote sheet names when they contain special characters */
private static final char SPECIAL_NAME_DELIMITER = '\'';
/**
* Matches a run of one or more letters followed by a run of one or more digits.
* The run of letters is group 1 and the run of digits is group 2.
* The run of letters is group 1 and the run of digits is group 2.
* Each group may optionally be prefixed with a single '$'.
*/
private static final Pattern CELL_REF_PATTERN = Pattern.compile("\\$?([A-Za-z]+)\\$?([0-9]+)");
/**
* Matches a run of one or more letters. The run of letters is group 1.
* Matches a run of one or more letters. The run of letters is group 1.
* The text may optionally be prefixed with a single '$'.
*/
private static final Pattern COLUMN_REF_PATTERN = Pattern.compile("\\$?([A-Za-z]+)");
@ -68,11 +67,11 @@ public class CellReference {
//private static final String BIFF8_LAST_ROW = String.valueOf(SpreadsheetVersion.EXCEL97.getMaxRows());
//private static final int BIFF8_LAST_ROW_TEXT_LEN = BIFF8_LAST_ROW.length();
private final int _rowIndex;
private final int _colIndex;
private final String _sheetName;
private final boolean _isRowAbs;
private final boolean _isColAbs;
private final int _rowIndex;
private final int _colIndex;
private final String _sheetName;
private final boolean _isRowAbs;
private final boolean _isColAbs;
/**
* Create an cell ref from a string representation. Sheet names containing special characters should be
@ -81,7 +80,7 @@ public class CellReference {
public CellReference(String cellRef) {
String[] parts = separateRefParts(cellRef);
_sheetName = parts[0];
String colRef = parts[1];
String colRef = parts[1];
if (colRef.length() < 1) {
throw new IllegalArgumentException("Invalid Formula cell reference: '"+cellRef+"'");
}
@ -90,7 +89,7 @@ public class CellReference {
colRef=colRef.substring(1);
}
_colIndex = convertColStringToIndex(colRef);
String rowRef=parts[2];
if (rowRef.length() < 1) {
throw new IllegalArgumentException("Invalid Formula cell reference: '"+cellRef+"'");
@ -139,7 +138,7 @@ public class CellReference {
public String getSheetName(){
return _sheetName;
}
public static boolean isPartAbsolute(String part) {
return part.charAt(0) == ABSOLUTE_REFERENCE_MARKER;
}
@ -153,7 +152,7 @@ public class CellReference {
* @return zero based column index
*/
public static int convertColStringToIndex(String ref) {
int pos = 0;
int retval=0;
for (int k = ref.length()-1; k >= 0; k--) {
@ -175,7 +174,7 @@ public class CellReference {
/**
* Classifies an identifier as either a simple (2D) cell reference or a named range name
* @return one of the values from <tt>NameType</tt>
* @return one of the values from <tt>NameType</tt>
*/
public static int classifyCellReference(String str, SpreadsheetVersion ssVersion) {
int len = str.length();
@ -190,7 +189,7 @@ public class CellReference {
break;
default:
if (!Character.isLetter(firstChar)) {
throw new IllegalArgumentException("Invalid first char (" + firstChar
throw new IllegalArgumentException("Invalid first char (" + firstChar
+ ") of cell reference or named range. Letter expected");
}
}
@ -204,7 +203,7 @@ public class CellReference {
}
String lettersGroup = cellRefPatternMatcher.group(1);
String digitsGroup = cellRefPatternMatcher.group(2);
if (cellReferenceIsWithinRange(lettersGroup, digitsGroup, ssVersion)) {
if (cellReferenceIsWithinRange(lettersGroup, digitsGroup, ssVersion)) {
// valid cell reference
return NameType.CELL;
}
@ -233,17 +232,17 @@ public class CellReference {
}
return NameType.NAMED_RANGE;
}
/**
* Used to decide whether a name of the form "[A-Z]*[0-9]*" that appears in a formula can be
* Used to decide whether a name of the form "[A-Z]*[0-9]*" that appears in a formula can be
* interpreted as a cell reference. Names of that form can be also used for sheets and/or
* named ranges, and in those circumstances, the question of whether the potential cell
* named ranges, and in those circumstances, the question of whether the potential cell
* reference is valid (in range) becomes important.
* <p/>
* Note - that the maximum sheet size varies across Excel versions:
* <p/>
* <blockquote><table border="0" cellpadding="1" cellspacing="0"
* <blockquote><table border="0" cellpadding="1" cellspacing="0"
* summary="Notable cases.">
* <tr><th>Version&nbsp;&nbsp;</th><th>File Format&nbsp;&nbsp;</th>
* <th>Last Column&nbsp;&nbsp;</th><th>Last Row</th></tr>
@ -252,7 +251,7 @@ public class CellReference {
* </table></blockquote>
* POI currently targets BIFF8 (Excel 97-2003), so the following behaviour can be observed for
* this method:
* <blockquote><table border="0" cellpadding="1" cellspacing="0"
* <blockquote><table border="0" cellpadding="1" cellspacing="0"
* summary="Notable cases.">
* <tr><th>Input&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</th>
* <th>Result&nbsp;</th></tr>
@ -266,7 +265,7 @@ public class CellReference {
* <tr><td>"a", "111"</td><td>true</td></tr>
* <tr><td>"Sheet", "1"</td><td>false</td></tr>
* </table></blockquote>
*
*
* @param colStr a string of only letter characters
* @param rowStr a string of only digit characters
* @return <code>true</code> if the row and col parameters are within range of a BIFF8 spreadsheet.
@ -275,30 +274,23 @@ public class CellReference {
if (!isColumnWithnRange(colStr, ssVersion)) {
return false;
}
String lastRow = String.valueOf(ssVersion.getMaxRows());
int lastRowLen = lastRow.length();
int nDigits = rowStr.length();
if(nDigits > lastRowLen) {
return false;
int rowNum = Integer.parseInt(rowStr);
if (rowNum < 0) {
throw new IllegalStateException("Invalid rowStr '" + rowStr + "'.");
}
if(nDigits == lastRowLen) {
// ASCII comparison is valid if digit count is same
if(rowStr.compareTo(lastRow) > 0) {
return false;
}
} else {
// apparent row has less chars than max
// no need to check range
if (rowNum == 0) {
// execution gets here because caller does first pass of discriminating
// potential cell references using a simplistic regex pattern.
return false;
}
return true;
return rowNum <= ssVersion.getMaxRows();
}
public static boolean isColumnWithnRange(String colStr, SpreadsheetVersion ssVersion) {
String lastCol = ssVersion.getLastColumnName();
int lastColLength = lastCol.length();
String lastCol = ssVersion.getLastColumnName();
int lastColLength = lastCol.length();
int numberOfLetters = colStr.length();
if(numberOfLetters > lastColLength) {
@ -318,11 +310,11 @@ public class CellReference {
/**
* Separates the row from the columns and returns an array of three Strings. The first element
* is the sheet name. Only the first element may be null. The second element in is the column
* is the sheet name. Only the first element may be null. The second element in is the column
* name still in ALPHA-26 number format. The third element is the row.
*/
private static String[] separateRefParts(String reference) {
int plingPos = reference.lastIndexOf(SHEET_NAME_DELIMITER);
String sheetName = parseSheetName(reference, plingPos);
int start = plingPos+1;
@ -331,7 +323,7 @@ public class CellReference {
int loc = start;
// skip initial dollars
// skip initial dollars
if (reference.charAt(loc)==ABSOLUTE_REFERENCE_MARKER) {
loc++;
}
@ -343,9 +335,9 @@ public class CellReference {
}
}
return new String[] {
sheetName,
reference.substring(start,loc),
reference.substring(loc),
sheetName,
reference.substring(start,loc),
reference.substring(loc),
};
}
@ -353,7 +345,7 @@ public class CellReference {
if(indexOfSheetNameDelimiter < 0) {
return null;
}
boolean isQuoted = reference.charAt(0) == SPECIAL_NAME_DELIMITER;
if(!isQuoted) {
return reference.substring(0, indexOfSheetNameDelimiter);
@ -364,14 +356,14 @@ public class CellReference {
}
// TODO - refactor cell reference parsing logic to one place.
// Current known incarnations:
// Current known incarnations:
// FormulaParser.GetName()
// CellReference.parseSheetName() (here)
// AreaReference.separateAreaRefs()
// AreaReference.separateAreaRefs()
// SheetNameFormatter.format() (inverse)
StringBuffer sb = new StringBuffer(indexOfSheetNameDelimiter);
for(int i=1; i<lastQuotePos; i++) { // Note boundaries - skip outer quotes
char ch = reference.charAt(i);
if(ch != SPECIAL_NAME_DELIMITER) {
@ -400,20 +392,20 @@ public class CellReference {
// Excel counts column A as the 1st column, we
// treat it as the 0th one
int excelColNum = col + 1;
String colRef = "";
int colRemain = excelColNum;
while(colRemain > 0) {
int thisPart = colRemain % 26;
if(thisPart == 0) { thisPart = 26; }
colRemain = (colRemain - thisPart) / 26;
// The letter A is at 65
char colChar = (char)(thisPart+64);
colRef = colChar + colRef;
}
return colRef;
}
@ -436,7 +428,7 @@ public class CellReference {
appendCellReference(sb);
return sb.toString();
}
public String toString() {
StringBuffer sb = new StringBuffer(64);
sb.append(getClass().getName()).append(" [");
@ -451,7 +443,7 @@ public class CellReference {
* row number, and the A based column letter.
* This will not include any markers for absolute
* references, so use {@link #formatAsString()}
* to properly turn references into strings.
* to properly turn references into strings.
*/
public String[] getCellRefParts() {
return new String[] {

View File

@ -68,6 +68,7 @@ import org.apache.poi.hssf.usermodel.TestHSSFName;
import org.apache.poi.ss.formula.FormulaParser;
import org.apache.poi.ss.formula.FormulaParserTestHelper;
import org.apache.poi.ss.usermodel.BaseTestBugzillaIssues;
import org.apache.poi.ss.usermodel.Name;
/**
* Test the low level formula parser functionality. High level tests are to
@ -1229,4 +1230,46 @@ public final class TestFormulaParser extends TestCase {
assertNotNull("Ptg array should not be null", result);
confirmTokenClasses(result, new Class[] { IntPtg.class, NamePtg.class, AddPtg.class,});
}
/**
* Zero is not a valid row number so cell references like 'A0' are not valid.
* Actually, they should be treated like defined names.
* <br/>
* In addition, leading zeros (on the row component) should be removed from cell
* references during parsing.
*/
public void testZeroRowRefs() {
String badCellRef = "B0"; // bad because zero is not a valid row number
String leadingZeroCellRef = "B000001"; // this should get parsed as "B1"
HSSFWorkbook wb = new HSSFWorkbook();
try {
HSSFFormulaParser.parse(badCellRef, wb);
throw new AssertionFailedError("Identified bug 47312b - Shouldn't be able to parse cell ref '"
+ badCellRef + "'.");
} catch (RuntimeException e) {
// expected during successful test
FormulaParserTestHelper.confirmParseException(e, "Specified named range '"
+ badCellRef + "' does not exist in the current workbook.");
}
Ptg[] ptgs;
try {
ptgs = HSSFFormulaParser.parse(leadingZeroCellRef, wb);
assertEquals("B1", ((RefPtg) ptgs[0]).toFormulaString());
} catch (RuntimeException e) {
FormulaParserTestHelper.confirmParseException(e, "Specified named range '"
+ leadingZeroCellRef + "' does not exist in the current workbook.");
// close but no cigar
throw new AssertionFailedError("Identified bug 47312c - '"
+ leadingZeroCellRef + "' should parse as 'B1'.");
}
// create a defined name called 'B0' and try again
Name n = wb.createName();
n.setNameName("B0");
n.setRefersToFormula("1+1");
ptgs = HSSFFormulaParser.parse("B0", wb);
assertEquals(NamePtg.class, ptgs[0].getClass());
}
}

View File

@ -17,15 +17,17 @@
package org.apache.poi.ss.util;
import org.apache.poi.ss.SpreadsheetVersion;
import org.apache.poi.ss.util.CellReference;
import junit.framework.AssertionFailedError;
import junit.framework.TestCase;
/**
* Tests that the common CellReference works as we need it to
*/
public class TestCellReference extends TestCase {
public final class TestCellReference extends TestCase {
public void testGetCellRefParts() {
CellReference cellReference;
@ -168,4 +170,35 @@ public class TestCellReference extends TestCase {
String collRef4 = new CellReference(0, col4).formatAsString();
assertEquals("CBA1", collRef4);
}
public void testBadRowNumber() {
SpreadsheetVersion v97 = SpreadsheetVersion.EXCEL97;
SpreadsheetVersion v2007 = SpreadsheetVersion.EXCEL2007;
confirmCrInRange(true, "A", "1", v97);
confirmCrInRange(true, "IV", "65536", v97);
confirmCrInRange(false, "IV", "65537", v97);
confirmCrInRange(false, "IW", "65536", v97);
confirmCrInRange(true, "A", "1", v2007);
confirmCrInRange(true, "XFD", "1048576", v2007);
confirmCrInRange(false, "XFD", "1048577", v2007);
confirmCrInRange(false, "XFE", "1048576", v2007);
if (CellReference.cellReferenceIsWithinRange("B", "0", v97)) {
throw new AssertionFailedError("Identified bug 47312a");
}
confirmCrInRange(false, "A", "0", v97);
confirmCrInRange(false, "A", "0", v2007);
}
private static void confirmCrInRange(boolean expResult, String colStr, String rowStr,
SpreadsheetVersion sv) {
if (expResult == CellReference.cellReferenceIsWithinRange(colStr, rowStr, sv)) {
return;
}
throw new AssertionFailedError("expected (c='" + colStr + "', r='" + rowStr + "' to be "
+ (expResult ? "within" : "out of") + " bounds for version " + sv.name());
}
}