From cfa6bdc2911f7c396e0615052e685de1a6226b46 Mon Sep 17 00:00:00 2001 From: Yegor Kozlov Date: Tue, 28 Feb 2012 13:52:09 +0000 Subject: [PATCH] Bugzilla 52784 - replace ISO control characters with question marks in SXSSF to be consistent with XSSF git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1294657 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/status.xml | 1 + .../poi/xssf/streaming/SheetDataWriter.java | 17 +++++----- .../poi/xssf/streaming/TestSXSSFCell.java | 34 +++++++++++++++++-- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 3e6792d07..b1a1825ad 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 52784 - replace ISO control characters with question marks in SXSSF to be consistent with XSSF 52057 - updated formula test framework to be aware of recently added Functions 52574 - support setting header / footer page margins in HSSF 52583 - fixed WorkbookUtil#createSafeSheetName to escape colon diff --git a/src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java b/src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java index d575e0532..c9a34a463 100644 --- a/src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java +++ b/src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java @@ -250,6 +250,7 @@ public class SheetDataWriter { break; // Special characters case '\n': + case '\r': if (counter > last) { _out.write(chars, last, counter - last); } @@ -263,13 +264,6 @@ public class SheetDataWriter { _out.write(" "); last = counter + 1; break; - case '\r': - if (counter > last) { - _out.write(chars, last, counter - last); - } - _out.write(" "); - last = counter + 1; - break; case 0xa0: if (counter > last) { _out.write(chars, last, counter - last); @@ -278,7 +272,14 @@ public class SheetDataWriter { last = counter + 1; break; default: - if (c < ' ' || c > 127) { + // YK: XmlBeans silently replaces all ISO control characters ( < 32) with question marks. + // the same rule applies to unicode surrogates and "not a character" symbols. + if( c < ' ' || Character.isLowSurrogate(c) || Character.isHighSurrogate(c) || + ('\uFFFE' <= c && c <= '\uFFFF')) { + _out.write('?'); + last = counter + 1; + } + else if (c > 127) { if (counter > last) { _out.write(chars, last, counter - last); } diff --git a/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFCell.java b/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFCell.java index 119a51ee5..638dbe20d 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFCell.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFCell.java @@ -21,6 +21,8 @@ package org.apache.poi.xssf.streaming; import org.apache.poi.ss.usermodel.*; import org.apache.poi.xssf.SXSSFITestDataProvider; +import org.apache.poi.xssf.XSSFITestDataProvider; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; /** * @@ -66,12 +68,40 @@ public class TestSXSSFCell extends BaseTestCell { Sheet sh = wb.createSheet(); Row row = sh.createRow(0); Cell cell = row.createCell(0); - String sval = "<>\t\r\n\u00a0 &\"POI\'\u2122"; + String sval = "\u0000\u0002\u0012<>\t\n\u00a0 &\"POI\'\u2122"; cell.setCellValue(sval); wb = _testDataProvider.writeOutAndReadBack(wb); - assertEquals(sval, wb.getSheetAt(0).getRow(0).getCell(0).getStringCellValue()); + // invalid characters are replaced with question marks + assertEquals("???<>\t\n\u00a0 &\"POI\'\u2122", wb.getSheetAt(0).getRow(0).getCell(0).getStringCellValue()); + + } + + public void testEncodingbeloAscii(){ + Workbook xwb = new XSSFWorkbook(); + Cell xCell = xwb.createSheet().createRow(0).createCell(0); + + Workbook swb = new SXSSFWorkbook(); + Cell sCell = swb.createSheet().createRow(0).createCell(0); + + StringBuffer sb = new StringBuffer(); + // test all possible characters + for(int i = 0; i < Character.MAX_VALUE; i++) sb.append((char)i) ; + + String str = sb.toString(); + + xCell.setCellValue(str); + assertEquals(str, xCell.getStringCellValue()); + sCell.setCellValue(str); + assertEquals(str, sCell.getStringCellValue()); + + xwb = XSSFITestDataProvider.instance.writeOutAndReadBack(xwb); + swb = SXSSFITestDataProvider.instance.writeOutAndReadBack(swb); + xCell = xwb.getSheetAt(0).createRow(0).createCell(0); + sCell = swb.getSheetAt(0).createRow(0).createCell(0); + + assertEquals(xCell.getStringCellValue(), sCell.getStringCellValue()); } }