Bugzilla 52784 - replace ISO control characters with question marks in SXSSF to be consistent with XSSF

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1294657 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2012-02-28 13:52:09 +00:00
parent 49dd4c3e69
commit cfa6bdc291
3 changed files with 42 additions and 10 deletions

View File

@ -34,6 +34,7 @@
<changes>
<release version="3.8-beta6" date="2012-??-??">
<action dev="poi-developers" type="fix">52784 - replace ISO control characters with question marks in SXSSF to be consistent with XSSF </action>
<action dev="poi-developers" type="add">52057 - updated formula test framework to be aware of recently added Functions </action>
<action dev="poi-developers" type="add">52574 - support setting header / footer page margins in HSSF </action>
<action dev="poi-developers" type="add">52583 - fixed WorkbookUtil#createSafeSheetName to escape colon </action>

View File

@ -250,6 +250,7 @@ public class SheetDataWriter {
break;
// Special characters
case '\n':
case '\r':
if (counter > last) {
_out.write(chars, last, counter - last);
}
@ -263,13 +264,6 @@ public class SheetDataWriter {
_out.write("&#x9;");
last = counter + 1;
break;
case '\r':
if (counter > last) {
_out.write(chars, last, counter - last);
}
_out.write("&#xd;");
last = counter + 1;
break;
case 0xa0:
if (counter > last) {
_out.write(chars, last, counter - last);
@ -278,7 +272,14 @@ public class SheetDataWriter {
last = counter + 1;
break;
default:
if (c < ' ' || c > 127) {
// YK: XmlBeans silently replaces all ISO control characters ( < 32) with question marks.
// the same rule applies to unicode surrogates and "not a character" symbols.
if( c < ' ' || Character.isLowSurrogate(c) || Character.isHighSurrogate(c) ||
('\uFFFE' <= c && c <= '\uFFFF')) {
_out.write('?');
last = counter + 1;
}
else if (c > 127) {
if (counter > last) {
_out.write(chars, last, counter - last);
}

View File

@ -21,6 +21,8 @@ package org.apache.poi.xssf.streaming;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.SXSSFITestDataProvider;
import org.apache.poi.xssf.XSSFITestDataProvider;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
/**
*
@ -66,12 +68,40 @@ public class TestSXSSFCell extends BaseTestCell {
Sheet sh = wb.createSheet();
Row row = sh.createRow(0);
Cell cell = row.createCell(0);
String sval = "<>\t\r\n\u00a0 &\"POI\'\u2122";
String sval = "\u0000\u0002\u0012<>\t\n\u00a0 &\"POI\'\u2122";
cell.setCellValue(sval);
wb = _testDataProvider.writeOutAndReadBack(wb);
assertEquals(sval, wb.getSheetAt(0).getRow(0).getCell(0).getStringCellValue());
// invalid characters are replaced with question marks
assertEquals("???<>\t\n\u00a0 &\"POI\'\u2122", wb.getSheetAt(0).getRow(0).getCell(0).getStringCellValue());
}
public void testEncodingbeloAscii(){
Workbook xwb = new XSSFWorkbook();
Cell xCell = xwb.createSheet().createRow(0).createCell(0);
Workbook swb = new SXSSFWorkbook();
Cell sCell = swb.createSheet().createRow(0).createCell(0);
StringBuffer sb = new StringBuffer();
// test all possible characters
for(int i = 0; i < Character.MAX_VALUE; i++) sb.append((char)i) ;
String str = sb.toString();
xCell.setCellValue(str);
assertEquals(str, xCell.getStringCellValue());
sCell.setCellValue(str);
assertEquals(str, sCell.getStringCellValue());
xwb = XSSFITestDataProvider.instance.writeOutAndReadBack(xwb);
swb = SXSSFITestDataProvider.instance.writeOutAndReadBack(swb);
xCell = xwb.getSheetAt(0).createRow(0).createCell(0);
sCell = swb.getSheetAt(0).createRow(0).createCell(0);
assertEquals(xCell.getStringCellValue(), sCell.getStringCellValue());
}
}