bug 60260: parse unicode sheet names
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1778418 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
13499b6444
commit
726fe10074
@ -96,7 +96,7 @@ public final class FormulaParser {
|
|||||||
private final static POILogger log = POILogFactory.getLogger(FormulaParser.class);
|
private final static POILogger log = POILogFactory.getLogger(FormulaParser.class);
|
||||||
private final String _formulaString;
|
private final String _formulaString;
|
||||||
private final int _formulaLength;
|
private final int _formulaLength;
|
||||||
/** points at the next character to be read (after the {@link #look} char) */
|
/** points at the next character to be read (after the {@link #look} codepoint) */
|
||||||
private int _pointer;
|
private int _pointer;
|
||||||
|
|
||||||
private ParseNode _rootNode;
|
private ParseNode _rootNode;
|
||||||
@ -106,10 +106,10 @@ public final class FormulaParser {
|
|||||||
private final static char LF = '\n'; // Normally just XSSF
|
private final static char LF = '\n'; // Normally just XSSF
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lookahead Character.
|
* Lookahead unicode codepoint
|
||||||
* gets value '\0' when the input string is exhausted
|
* gets value '\0' when the input string is exhausted
|
||||||
*/
|
*/
|
||||||
private char look;
|
private int look;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tracks whether the run of whitespace preceding "look" could be an
|
* Tracks whether the run of whitespace preceding "look" could be an
|
||||||
@ -226,20 +226,20 @@ public final class FormulaParser {
|
|||||||
throw new RuntimeException("too far");
|
throw new RuntimeException("too far");
|
||||||
}
|
}
|
||||||
if (_pointer < _formulaLength) {
|
if (_pointer < _formulaLength) {
|
||||||
look=_formulaString.charAt(_pointer);
|
look=_formulaString.codePointAt(_pointer);
|
||||||
} else {
|
} else {
|
||||||
// Just return if so and reset 'look' to something to keep
|
// Just return if so and reset 'look' to something to keep
|
||||||
// SkipWhitespace from spinning
|
// SkipWhitespace from spinning
|
||||||
look = (char)0;
|
look = (char)0;
|
||||||
_inIntersection = false;
|
_inIntersection = false;
|
||||||
}
|
}
|
||||||
_pointer++;
|
_pointer += Character.charCount(look);
|
||||||
//System.out.println("Got char: "+ look);
|
//System.out.println(new StringBuilder("Got char: ").appendCodePoint(look)).toString();
|
||||||
}
|
}
|
||||||
private void resetPointer(int ptr) {
|
private void resetPointer(int ptr) {
|
||||||
_pointer = ptr;
|
_pointer = ptr;
|
||||||
if (_pointer <= _formulaLength) {
|
if (_pointer <= _formulaLength) {
|
||||||
look=_formulaString.charAt(_pointer-1);
|
look=_formulaString.codePointAt(_pointer - Character.charCount(look));
|
||||||
} else {
|
} else {
|
||||||
// Just return if so and reset 'look' to something to keep
|
// Just return if so and reset 'look' to something to keep
|
||||||
// SkipWhitespace from spinning
|
// SkipWhitespace from spinning
|
||||||
@ -255,25 +255,32 @@ public final class FormulaParser {
|
|||||||
msg = "The specified formula '" + _formulaString
|
msg = "The specified formula '" + _formulaString
|
||||||
+ "' starts with an equals sign which is not allowed.";
|
+ "' starts with an equals sign which is not allowed.";
|
||||||
} else {
|
} else {
|
||||||
msg = "Parse error near char " + (_pointer-1) + " '" + look + "'"
|
msg = new StringBuilder("Parse error near char ")
|
||||||
+ " in specified formula '" + _formulaString + "'. Expected "
|
.append(_pointer-1) //this is the codepoint index, not char index, which may be larger if there are multi-byte chars
|
||||||
+ s;
|
.append(" '")
|
||||||
|
.appendCodePoint(look)
|
||||||
|
.append("'")
|
||||||
|
.append(" in specified formula '")
|
||||||
|
.append(_formulaString)
|
||||||
|
.append("'. Expected ")
|
||||||
|
.append(s)
|
||||||
|
.toString();
|
||||||
}
|
}
|
||||||
return new FormulaParseException(msg);
|
return new FormulaParseException(msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Recognize an Alpha Character */
|
/** Recognize an Alpha Character */
|
||||||
private static boolean IsAlpha(char c) {
|
private static boolean IsAlpha(int c) {
|
||||||
return Character.isLetter(c) || c == '$' || c=='_';
|
return Character.isLetter(c) || c == '$' || c=='_';
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Recognize a Decimal Digit */
|
/** Recognize a Decimal Digit */
|
||||||
private static boolean IsDigit(char c) {
|
private static boolean IsDigit(int c) {
|
||||||
return Character.isDigit(c);
|
return Character.isDigit(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Recognize White Space */
|
/** Recognize White Space */
|
||||||
private static boolean IsWhite( char c) {
|
private static boolean IsWhite(int c) {
|
||||||
return c ==' ' || c== TAB || c == CR || c == LF;
|
return c ==' ' || c== TAB || c == CR || c == LF;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -289,9 +296,13 @@ public final class FormulaParser {
|
|||||||
* unchecked exception. This method does <b>not</b> consume whitespace (before or after the
|
* unchecked exception. This method does <b>not</b> consume whitespace (before or after the
|
||||||
* matched character).
|
* matched character).
|
||||||
*/
|
*/
|
||||||
private void Match(char x) {
|
private void Match(int x) {
|
||||||
if (look != x) {
|
if (look != x) {
|
||||||
throw expected("'" + x + "'");
|
throw expected(new StringBuilder()
|
||||||
|
.append("'")
|
||||||
|
.appendCodePoint(x)
|
||||||
|
.append("'")
|
||||||
|
.toString());
|
||||||
}
|
}
|
||||||
GetChar();
|
GetChar();
|
||||||
}
|
}
|
||||||
@ -301,7 +312,7 @@ public final class FormulaParser {
|
|||||||
StringBuilder value = new StringBuilder();
|
StringBuilder value = new StringBuilder();
|
||||||
|
|
||||||
while (IsDigit(this.look)){
|
while (IsDigit(this.look)){
|
||||||
value.append(this.look);
|
value.appendCodePoint(this.look);
|
||||||
GetChar();
|
GetChar();
|
||||||
}
|
}
|
||||||
return value.length() == 0 ? null : value.toString();
|
return value.length() == 0 ? null : value.toString();
|
||||||
@ -826,7 +837,7 @@ public final class FormulaParser {
|
|||||||
}
|
}
|
||||||
StringBuilder name = new StringBuilder();
|
StringBuilder name = new StringBuilder();
|
||||||
while (look!=']') {
|
while (look!=']') {
|
||||||
name.append(look);
|
name.appendCodePoint(look);
|
||||||
GetChar();
|
GetChar();
|
||||||
}
|
}
|
||||||
Match(']');
|
Match(']');
|
||||||
@ -914,7 +925,7 @@ public final class FormulaParser {
|
|||||||
throw expected("number, string, defined name, or data table");
|
throw expected("number, string, defined name, or data table");
|
||||||
}
|
}
|
||||||
while (isValidDefinedNameChar(look)) {
|
while (isValidDefinedNameChar(look)) {
|
||||||
sb.append(look);
|
sb.appendCodePoint(look);
|
||||||
GetChar();
|
GetChar();
|
||||||
}
|
}
|
||||||
SkipWhite();
|
SkipWhite();
|
||||||
@ -923,13 +934,18 @@ public final class FormulaParser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* @param ch unicode codepoint
|
||||||
* @return <code>true</code> if the specified character may be used in a defined name
|
* @return <code>true</code> if the specified character may be used in a defined name
|
||||||
*/
|
*/
|
||||||
private static boolean isValidDefinedNameChar(char ch) {
|
private static boolean isValidDefinedNameChar(int ch) {
|
||||||
if (Character.isLetterOrDigit(ch)) {
|
if (Character.isLetterOrDigit(ch)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
// the sheet naming rules are vague on whether unicode characters are allowed
|
||||||
|
// assume they're allowed.
|
||||||
|
if (ch > 128) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
switch (ch) {
|
switch (ch) {
|
||||||
case '.':
|
case '.':
|
||||||
case '_':
|
case '_':
|
||||||
@ -937,6 +953,7 @@ public final class FormulaParser {
|
|||||||
case '\\': // of all things
|
case '\\': // of all things
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
// includes special non-name control characters like ! $ : , ( ) [ ] and space
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1120,7 +1137,7 @@ public final class FormulaParser {
|
|||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
GetChar();
|
GetChar();
|
||||||
while (look != ']') {
|
while (look != ']') {
|
||||||
sb.append(look);
|
sb.appendCodePoint(look);
|
||||||
GetChar();
|
GetChar();
|
||||||
}
|
}
|
||||||
GetChar();
|
GetChar();
|
||||||
@ -1148,7 +1165,7 @@ public final class FormulaParser {
|
|||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
boolean done = look == '\'';
|
boolean done = look == '\'';
|
||||||
while(!done) {
|
while(!done) {
|
||||||
sb.append(look);
|
sb.appendCodePoint(look);
|
||||||
GetChar();
|
GetChar();
|
||||||
if(look == '\'')
|
if(look == '\'')
|
||||||
{
|
{
|
||||||
@ -1176,7 +1193,7 @@ public final class FormulaParser {
|
|||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
// can concatenate idens with dots
|
// can concatenate idens with dots
|
||||||
while (isUnquotedSheetNameChar(look)) {
|
while (isUnquotedSheetNameChar(look)) {
|
||||||
sb.append(look);
|
sb.appendCodePoint(look);
|
||||||
GetChar();
|
GetChar();
|
||||||
}
|
}
|
||||||
NameIdentifier iden = new NameIdentifier(sb.toString(), false);
|
NameIdentifier iden = new NameIdentifier(sb.toString(), false);
|
||||||
@ -1214,11 +1231,17 @@ public final class FormulaParser {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* very similar to {@link SheetNameFormatter#isSpecialChar(char)}
|
* very similar to {@link SheetNameFormatter#isSpecialChar(char)}
|
||||||
|
* @param ch unicode codepoint
|
||||||
*/
|
*/
|
||||||
private static boolean isUnquotedSheetNameChar(char ch) {
|
private static boolean isUnquotedSheetNameChar(int ch) {
|
||||||
if(Character.isLetterOrDigit(ch)) {
|
if(Character.isLetterOrDigit(ch)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
// the sheet naming rules are vague on whether unicode characters are allowed
|
||||||
|
// assume they're allowed.
|
||||||
|
if (ch > 128) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
switch(ch) {
|
switch(ch) {
|
||||||
case '.': // dot is OK
|
case '.': // dot is OK
|
||||||
case '_': // underscore is OK
|
case '_': // underscore is OK
|
||||||
@ -1413,7 +1436,11 @@ public final class FormulaParser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean isArgumentDelimiter(char ch) {
|
/**
|
||||||
|
* @param ch unicode codepoint
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
private static boolean isArgumentDelimiter(int ch) {
|
||||||
return ch == ',' || ch == ')';
|
return ch == ',' || ch == ')';
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1754,7 +1781,7 @@ public final class FormulaParser {
|
|||||||
}
|
}
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
while (Character.isLetterOrDigit(look) || look == '.') {
|
while (Character.isLetterOrDigit(look) || look == '.') {
|
||||||
sb.append(look);
|
sb.appendCodePoint(look);
|
||||||
GetChar();
|
GetChar();
|
||||||
}
|
}
|
||||||
if (sb.length() < 1) {
|
if (sb.length() < 1) {
|
||||||
@ -1819,7 +1846,7 @@ public final class FormulaParser {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
token.append(look);
|
token.appendCodePoint(look);
|
||||||
GetChar();
|
GetChar();
|
||||||
}
|
}
|
||||||
return token.toString();
|
return token.toString();
|
||||||
|
@ -23,6 +23,8 @@ import static org.junit.Assert.assertNotNull;
|
|||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.poi.hssf.usermodel.HSSFEvaluationWorkbook;
|
import org.apache.poi.hssf.usermodel.HSSFEvaluationWorkbook;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
||||||
import org.apache.poi.ss.formula.ptg.AbstractFunctionPtg;
|
import org.apache.poi.ss.formula.ptg.AbstractFunctionPtg;
|
||||||
@ -30,6 +32,7 @@ import org.apache.poi.ss.formula.ptg.NameXPxg;
|
|||||||
import org.apache.poi.ss.formula.ptg.Ptg;
|
import org.apache.poi.ss.formula.ptg.Ptg;
|
||||||
import org.apache.poi.ss.formula.ptg.Ref3DPxg;
|
import org.apache.poi.ss.formula.ptg.Ref3DPxg;
|
||||||
import org.apache.poi.ss.formula.ptg.StringPtg;
|
import org.apache.poi.ss.formula.ptg.StringPtg;
|
||||||
|
import org.apache.poi.util.IOUtils;
|
||||||
import org.apache.poi.xssf.XSSFTestDataSamples;
|
import org.apache.poi.xssf.XSSFTestDataSamples;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFEvaluationWorkbook;
|
import org.apache.poi.xssf.usermodel.XSSFEvaluationWorkbook;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||||
@ -55,16 +58,31 @@ public class TestFormulaParser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void checkHSSFFormula(String formula) {
|
||||||
|
HSSFWorkbook wb = new HSSFWorkbook();
|
||||||
|
FormulaParsingWorkbook workbook = HSSFEvaluationWorkbook.create(wb);
|
||||||
|
FormulaParser.parse(formula, workbook, FormulaType.CELL, 0);
|
||||||
|
IOUtils.closeQuietly(wb);
|
||||||
|
}
|
||||||
|
private static void checkXSSFFormula(String formula) {
|
||||||
|
XSSFWorkbook wb = new XSSFWorkbook();
|
||||||
|
FormulaParsingWorkbook workbook = XSSFEvaluationWorkbook.create(wb);
|
||||||
|
FormulaParser.parse(formula, workbook, FormulaType.CELL, 0);
|
||||||
|
IOUtils.closeQuietly(wb);
|
||||||
|
}
|
||||||
|
private static void checkFormula(String formula) {
|
||||||
|
checkHSSFFormula(formula);
|
||||||
|
checkXSSFFormula(formula);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHSSFPassCase() {
|
public void testHSSFPassCase() {
|
||||||
FormulaParsingWorkbook workbook = HSSFEvaluationWorkbook.create(new HSSFWorkbook());
|
checkHSSFFormula("Sheet1!1:65536");
|
||||||
FormulaParser.parse("Sheet1!1:65536", workbook, FormulaType.CELL, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testXSSFWorksForOver65536() {
|
public void testXSSFWorksForOver65536() {
|
||||||
FormulaParsingWorkbook workbook = XSSFEvaluationWorkbook.create(new XSSFWorkbook());
|
checkXSSFFormula("Sheet1!1:65537");
|
||||||
FormulaParser.parse("Sheet1!1:65537", workbook, FormulaType.CELL, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -203,4 +221,10 @@ public class TestFormulaParser {
|
|||||||
assertEquals("Column", 0, pxg.getColumn());
|
assertEquals("Column", 0, pxg.getColumn());
|
||||||
wb.close();
|
wb.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// bug 60260
|
||||||
|
@Test
|
||||||
|
public void testUnicodeSheetName() {
|
||||||
|
checkFormula("'Sheet\u30FB1'!A1:A6");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -35,6 +35,7 @@ import org.apache.poi.ss.usermodel.Sheet;
|
|||||||
import org.apache.poi.ss.usermodel.Workbook;
|
import org.apache.poi.ss.usermodel.Workbook;
|
||||||
import org.apache.poi.ss.util.CellAddress;
|
import org.apache.poi.ss.util.CellAddress;
|
||||||
import org.apache.poi.ss.util.CellUtil;
|
import org.apache.poi.ss.util.CellUtil;
|
||||||
|
import org.apache.poi.util.IOUtils;
|
||||||
import org.apache.poi.xssf.XSSFITestDataProvider;
|
import org.apache.poi.xssf.XSSFITestDataProvider;
|
||||||
import org.apache.poi.xssf.XSSFTestDataSamples;
|
import org.apache.poi.xssf.XSSFTestDataSamples;
|
||||||
import org.apache.xmlbeans.impl.values.XmlValueDisconnectedException;
|
import org.apache.xmlbeans.impl.values.XmlValueDisconnectedException;
|
||||||
@ -449,4 +450,14 @@ public final class TestXSSFSheetShiftRows extends BaseTestSheetShiftRows {
|
|||||||
|
|
||||||
wb.close();
|
wb.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// bug 60260: shift rows or rename a sheet containing a named range
|
||||||
|
// that refers to formula with a unicode (non-ASCII) sheet name formula
|
||||||
|
@Test
|
||||||
|
public void shiftRowsWithUnicodeNamedRange() throws IOException {
|
||||||
|
XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("unicodeSheetName.xlsx");
|
||||||
|
XSSFSheet sheet = wb.getSheetAt(0);
|
||||||
|
sheet.shiftRows(1, 2, 3);
|
||||||
|
IOUtils.closeQuietly(wb);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -31,6 +31,7 @@ import java.util.List;
|
|||||||
import org.apache.poi.ss.ITestDataProvider;
|
import org.apache.poi.ss.ITestDataProvider;
|
||||||
import org.apache.poi.ss.util.AreaReference;
|
import org.apache.poi.ss.util.AreaReference;
|
||||||
import org.apache.poi.ss.util.CellReference;
|
import org.apache.poi.ss.util.CellReference;
|
||||||
|
import org.apache.poi.util.IOUtils;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -737,4 +738,18 @@ public abstract class BaseTestNamedRange {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// bug 60260: renaming a sheet with a named range referring to a unicode (non-ASCII) sheet name
|
||||||
|
@Test
|
||||||
|
public void renameSheetWithNamedRangeReferringToUnicodeSheetName() {
|
||||||
|
Workbook wb = _testDataProvider.createWorkbook();
|
||||||
|
wb.createSheet("Sheet\u30FB1");
|
||||||
|
|
||||||
|
Name name = wb.createName();
|
||||||
|
name.setNameName("test_named_range");
|
||||||
|
name.setRefersToFormula("'Sheet\u30FB201'!A1:A6");
|
||||||
|
|
||||||
|
wb.setSheetName(0, "Sheet 1");
|
||||||
|
IOUtils.closeQuietly(wb);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
BIN
test-data/spreadsheet/unicodeSheetName.xlsx
Normal file
BIN
test-data/spreadsheet/unicodeSheetName.xlsx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user