diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 01d4a1f70..f84c086a8 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -35,6 +35,7 @@ + 47312 - Fixed formula parser to properly reject cell references with a '0' row component 47199 - Fixed PageSettingsBlock/Sheet to tolerate margin records after other non-PSB records 47069 - Fixed HSSFSheet#getFirstRowNum and HSSFSheet#getLastRowNum to return correct values after removal of all rows 47278 - Fixed XSSFCell to avoid generating xsi:nil entries in shared string table diff --git a/src/java/org/apache/poi/ss/util/CellReference.java b/src/java/org/apache/poi/ss/util/CellReference.java index b559c4302..d52e9a8be 100644 --- a/src/java/org/apache/poi/ss/util/CellReference.java +++ b/src/java/org/apache/poi/ss/util/CellReference.java @@ -21,7 +21,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.poi.hssf.record.formula.SheetNameFormatter; -import org.apache.poi.hssf.record.formula.function.FunctionMetadataRegistry; import org.apache.poi.ss.SpreadsheetVersion; /** @@ -40,21 +39,21 @@ public class CellReference { public static final int BAD_CELL_OR_NAMED_RANGE = -1; } - /** The character ($) that signifies a row or column value is absolute instead of relative */ + /** The character ($) that signifies a row or column value is absolute instead of relative */ private static final char ABSOLUTE_REFERENCE_MARKER = '$'; - /** The character (!) that separates sheet names from cell references */ + /** The character (!) that separates sheet names from cell references */ private static final char SHEET_NAME_DELIMITER = '!'; /** The character (') used to quote sheet names when they contain special characters */ private static final char SPECIAL_NAME_DELIMITER = '\''; - + /** * Matches a run of one or more letters followed by a run of one or more digits. - * The run of letters is group 1 and the run of digits is group 2. + * The run of letters is group 1 and the run of digits is group 2. * Each group may optionally be prefixed with a single '$'. */ private static final Pattern CELL_REF_PATTERN = Pattern.compile("\\$?([A-Za-z]+)\\$?([0-9]+)"); /** - * Matches a run of one or more letters. The run of letters is group 1. + * Matches a run of one or more letters. The run of letters is group 1. * The text may optionally be prefixed with a single '$'. */ private static final Pattern COLUMN_REF_PATTERN = Pattern.compile("\\$?([A-Za-z]+)"); @@ -68,11 +67,11 @@ public class CellReference { //private static final String BIFF8_LAST_ROW = String.valueOf(SpreadsheetVersion.EXCEL97.getMaxRows()); //private static final int BIFF8_LAST_ROW_TEXT_LEN = BIFF8_LAST_ROW.length(); - private final int _rowIndex; - private final int _colIndex; - private final String _sheetName; - private final boolean _isRowAbs; - private final boolean _isColAbs; + private final int _rowIndex; + private final int _colIndex; + private final String _sheetName; + private final boolean _isRowAbs; + private final boolean _isColAbs; /** * Create an cell ref from a string representation. Sheet names containing special characters should be @@ -81,7 +80,7 @@ public class CellReference { public CellReference(String cellRef) { String[] parts = separateRefParts(cellRef); _sheetName = parts[0]; - String colRef = parts[1]; + String colRef = parts[1]; if (colRef.length() < 1) { throw new IllegalArgumentException("Invalid Formula cell reference: '"+cellRef+"'"); } @@ -90,7 +89,7 @@ public class CellReference { colRef=colRef.substring(1); } _colIndex = convertColStringToIndex(colRef); - + String rowRef=parts[2]; if (rowRef.length() < 1) { throw new IllegalArgumentException("Invalid Formula cell reference: '"+cellRef+"'"); @@ -139,7 +138,7 @@ public class CellReference { public String getSheetName(){ return _sheetName; } - + public static boolean isPartAbsolute(String part) { return part.charAt(0) == ABSOLUTE_REFERENCE_MARKER; } @@ -153,7 +152,7 @@ public class CellReference { * @return zero based column index */ public static int convertColStringToIndex(String ref) { - + int pos = 0; int retval=0; for (int k = ref.length()-1; k >= 0; k--) { @@ -175,7 +174,7 @@ public class CellReference { /** * Classifies an identifier as either a simple (2D) cell reference or a named range name - * @return one of the values from NameType + * @return one of the values from NameType */ public static int classifyCellReference(String str, SpreadsheetVersion ssVersion) { int len = str.length(); @@ -190,7 +189,7 @@ public class CellReference { break; default: if (!Character.isLetter(firstChar)) { - throw new IllegalArgumentException("Invalid first char (" + firstChar + throw new IllegalArgumentException("Invalid first char (" + firstChar + ") of cell reference or named range. Letter expected"); } } @@ -204,7 +203,7 @@ public class CellReference { } String lettersGroup = cellRefPatternMatcher.group(1); String digitsGroup = cellRefPatternMatcher.group(2); - if (cellReferenceIsWithinRange(lettersGroup, digitsGroup, ssVersion)) { + if (cellReferenceIsWithinRange(lettersGroup, digitsGroup, ssVersion)) { // valid cell reference return NameType.CELL; } @@ -233,17 +232,17 @@ public class CellReference { } return NameType.NAMED_RANGE; } - - + + /** - * Used to decide whether a name of the form "[A-Z]*[0-9]*" that appears in a formula can be + * Used to decide whether a name of the form "[A-Z]*[0-9]*" that appears in a formula can be * interpreted as a cell reference. Names of that form can be also used for sheets and/or - * named ranges, and in those circumstances, the question of whether the potential cell + * named ranges, and in those circumstances, the question of whether the potential cell * reference is valid (in range) becomes important. *

* Note - that the maximum sheet size varies across Excel versions: *

- *

* * @@ -252,7 +251,7 @@ public class CellReference { *
Version  File Format  Last Column  Last Row
* POI currently targets BIFF8 (Excel 97-2003), so the following behaviour can be observed for * this method: - *
* * @@ -266,7 +265,7 @@ public class CellReference { * * *
Input           Result 
"a", "111"true
"Sheet", "1"false
- * + * * @param colStr a string of only letter characters * @param rowStr a string of only digit characters * @return true if the row and col parameters are within range of a BIFF8 spreadsheet. @@ -275,30 +274,23 @@ public class CellReference { if (!isColumnWithnRange(colStr, ssVersion)) { return false; } - String lastRow = String.valueOf(ssVersion.getMaxRows()); - int lastRowLen = lastRow.length(); - int nDigits = rowStr.length(); - if(nDigits > lastRowLen) { - return false; + int rowNum = Integer.parseInt(rowStr); + + if (rowNum < 0) { + throw new IllegalStateException("Invalid rowStr '" + rowStr + "'."); } - - if(nDigits == lastRowLen) { - // ASCII comparison is valid if digit count is same - if(rowStr.compareTo(lastRow) > 0) { - return false; - } - } else { - // apparent row has less chars than max - // no need to check range + if (rowNum == 0) { + // execution gets here because caller does first pass of discriminating + // potential cell references using a simplistic regex pattern. + return false; } - - return true; + return rowNum <= ssVersion.getMaxRows(); } public static boolean isColumnWithnRange(String colStr, SpreadsheetVersion ssVersion) { - String lastCol = ssVersion.getLastColumnName(); - int lastColLength = lastCol.length(); + String lastCol = ssVersion.getLastColumnName(); + int lastColLength = lastCol.length(); int numberOfLetters = colStr.length(); if(numberOfLetters > lastColLength) { @@ -318,11 +310,11 @@ public class CellReference { /** * Separates the row from the columns and returns an array of three Strings. The first element - * is the sheet name. Only the first element may be null. The second element in is the column + * is the sheet name. Only the first element may be null. The second element in is the column * name still in ALPHA-26 number format. The third element is the row. */ private static String[] separateRefParts(String reference) { - + int plingPos = reference.lastIndexOf(SHEET_NAME_DELIMITER); String sheetName = parseSheetName(reference, plingPos); int start = plingPos+1; @@ -331,7 +323,7 @@ public class CellReference { int loc = start; - // skip initial dollars + // skip initial dollars if (reference.charAt(loc)==ABSOLUTE_REFERENCE_MARKER) { loc++; } @@ -343,9 +335,9 @@ public class CellReference { } } return new String[] { - sheetName, - reference.substring(start,loc), - reference.substring(loc), + sheetName, + reference.substring(start,loc), + reference.substring(loc), }; } @@ -353,7 +345,7 @@ public class CellReference { if(indexOfSheetNameDelimiter < 0) { return null; } - + boolean isQuoted = reference.charAt(0) == SPECIAL_NAME_DELIMITER; if(!isQuoted) { return reference.substring(0, indexOfSheetNameDelimiter); @@ -364,14 +356,14 @@ public class CellReference { } // TODO - refactor cell reference parsing logic to one place. - // Current known incarnations: + // Current known incarnations: // FormulaParser.GetName() // CellReference.parseSheetName() (here) - // AreaReference.separateAreaRefs() + // AreaReference.separateAreaRefs() // SheetNameFormatter.format() (inverse) - + StringBuffer sb = new StringBuffer(indexOfSheetNameDelimiter); - + for(int i=1; i 0) { int thisPart = colRemain % 26; if(thisPart == 0) { thisPart = 26; } colRemain = (colRemain - thisPart) / 26; - + // The letter A is at 65 char colChar = (char)(thisPart+64); colRef = colChar + colRef; } - + return colRef; } @@ -436,7 +428,7 @@ public class CellReference { appendCellReference(sb); return sb.toString(); } - + public String toString() { StringBuffer sb = new StringBuffer(64); sb.append(getClass().getName()).append(" ["); @@ -451,7 +443,7 @@ public class CellReference { * row number, and the A based column letter. * This will not include any markers for absolute * references, so use {@link #formatAsString()} - * to properly turn references into strings. + * to properly turn references into strings. */ public String[] getCellRefParts() { return new String[] { diff --git a/src/testcases/org/apache/poi/hssf/model/TestFormulaParser.java b/src/testcases/org/apache/poi/hssf/model/TestFormulaParser.java index 0ccd54153..30c6df530 100644 --- a/src/testcases/org/apache/poi/hssf/model/TestFormulaParser.java +++ b/src/testcases/org/apache/poi/hssf/model/TestFormulaParser.java @@ -68,6 +68,7 @@ import org.apache.poi.hssf.usermodel.TestHSSFName; import org.apache.poi.ss.formula.FormulaParser; import org.apache.poi.ss.formula.FormulaParserTestHelper; import org.apache.poi.ss.usermodel.BaseTestBugzillaIssues; +import org.apache.poi.ss.usermodel.Name; /** * Test the low level formula parser functionality. High level tests are to @@ -1229,4 +1230,46 @@ public final class TestFormulaParser extends TestCase { assertNotNull("Ptg array should not be null", result); confirmTokenClasses(result, new Class[] { IntPtg.class, NamePtg.class, AddPtg.class,}); } + + /** + * Zero is not a valid row number so cell references like 'A0' are not valid. + * Actually, they should be treated like defined names. + *
+ * In addition, leading zeros (on the row component) should be removed from cell + * references during parsing. + */ + public void testZeroRowRefs() { + String badCellRef = "B0"; // bad because zero is not a valid row number + String leadingZeroCellRef = "B000001"; // this should get parsed as "B1" + HSSFWorkbook wb = new HSSFWorkbook(); + + try { + HSSFFormulaParser.parse(badCellRef, wb); + throw new AssertionFailedError("Identified bug 47312b - Shouldn't be able to parse cell ref '" + + badCellRef + "'."); + } catch (RuntimeException e) { + // expected during successful test + FormulaParserTestHelper.confirmParseException(e, "Specified named range '" + + badCellRef + "' does not exist in the current workbook."); + } + + Ptg[] ptgs; + try { + ptgs = HSSFFormulaParser.parse(leadingZeroCellRef, wb); + assertEquals("B1", ((RefPtg) ptgs[0]).toFormulaString()); + } catch (RuntimeException e) { + FormulaParserTestHelper.confirmParseException(e, "Specified named range '" + + leadingZeroCellRef + "' does not exist in the current workbook."); + // close but no cigar + throw new AssertionFailedError("Identified bug 47312c - '" + + leadingZeroCellRef + "' should parse as 'B1'."); + } + + // create a defined name called 'B0' and try again + Name n = wb.createName(); + n.setNameName("B0"); + n.setRefersToFormula("1+1"); + ptgs = HSSFFormulaParser.parse("B0", wb); + assertEquals(NamePtg.class, ptgs[0].getClass()); + } } diff --git a/src/testcases/org/apache/poi/ss/util/TestCellReference.java b/src/testcases/org/apache/poi/ss/util/TestCellReference.java index f13b22113..4c86c0f74 100644 --- a/src/testcases/org/apache/poi/ss/util/TestCellReference.java +++ b/src/testcases/org/apache/poi/ss/util/TestCellReference.java @@ -17,15 +17,17 @@ package org.apache.poi.ss.util; +import org.apache.poi.ss.SpreadsheetVersion; import org.apache.poi.ss.util.CellReference; +import junit.framework.AssertionFailedError; import junit.framework.TestCase; /** * Tests that the common CellReference works as we need it to */ -public class TestCellReference extends TestCase { +public final class TestCellReference extends TestCase { public void testGetCellRefParts() { CellReference cellReference; @@ -168,4 +170,35 @@ public class TestCellReference extends TestCase { String collRef4 = new CellReference(0, col4).formatAsString(); assertEquals("CBA1", collRef4); } + + public void testBadRowNumber() { + SpreadsheetVersion v97 = SpreadsheetVersion.EXCEL97; + SpreadsheetVersion v2007 = SpreadsheetVersion.EXCEL2007; + + confirmCrInRange(true, "A", "1", v97); + confirmCrInRange(true, "IV", "65536", v97); + confirmCrInRange(false, "IV", "65537", v97); + confirmCrInRange(false, "IW", "65536", v97); + + confirmCrInRange(true, "A", "1", v2007); + confirmCrInRange(true, "XFD", "1048576", v2007); + confirmCrInRange(false, "XFD", "1048577", v2007); + confirmCrInRange(false, "XFE", "1048576", v2007); + + if (CellReference.cellReferenceIsWithinRange("B", "0", v97)) { + throw new AssertionFailedError("Identified bug 47312a"); + } + + confirmCrInRange(false, "A", "0", v97); + confirmCrInRange(false, "A", "0", v2007); + } + + private static void confirmCrInRange(boolean expResult, String colStr, String rowStr, + SpreadsheetVersion sv) { + if (expResult == CellReference.cellReferenceIsWithinRange(colStr, rowStr, sv)) { + return; + } + throw new AssertionFailedError("expected (c='" + colStr + "', r='" + rowStr + "' to be " + + (expResult ? "within" : "out of") + " bounds for version " + sv.name()); + } }