diff --git a/src/testcases/org/apache/poi/hssf/usermodel/TestUnfixedBugs.java b/src/testcases/org/apache/poi/hssf/usermodel/TestUnfixedBugs.java index 9a2dc5dfb..0ebb78e37 100644 --- a/src/testcases/org/apache/poi/hssf/usermodel/TestUnfixedBugs.java +++ b/src/testcases/org/apache/poi/hssf/usermodel/TestUnfixedBugs.java @@ -17,13 +17,22 @@ package org.apache.poi.hssf.usermodel; +import java.io.IOException; +import java.io.UnsupportedEncodingException; + import junit.framework.AssertionFailedError; import junit.framework.TestCase; import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.record.RecordFormatException; - -import java.io.IOException; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.xssf.SXSSFITestDataProvider; +import org.apache.poi.xssf.XSSFTestDataSamples; +import org.apache.poi.xssf.streaming.SXSSFWorkbook; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; /** * @author aviks @@ -77,4 +86,40 @@ public final class TestUnfixedBugs extends TestCase { assertEquals("evaluating e1", 30., eval.evaluate(e1).getNumberValue()); } + + public void testBug54084Unicode() throws IOException { + // sample XLSX with the same text-contents as the text-file above + XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("54084 - Greek - beyond BMP.xlsx"); + + verifyBug54084Unicode(wb); + +// OutputStream baos = new FileOutputStream("/tmp/test.xlsx"); +// try { +// wb.write(baos); +// } finally { +// baos.close(); +// } + + // now write the file and read it back in + XSSFWorkbook wbWritten = XSSFTestDataSamples.writeOutAndReadBack(wb); + verifyBug54084Unicode(wbWritten); + + // finally also write it out via the streaming interface and verify that we still can read it back in + Workbook wbStreamingWritten = SXSSFITestDataProvider.instance.writeOutAndReadBack(new SXSSFWorkbook(wb)); + verifyBug54084Unicode(wbStreamingWritten); + } + + private void verifyBug54084Unicode(Workbook wb) throws UnsupportedEncodingException { + // expected data is stored in UTF-8 in a text-file + String testData = new String(HSSFTestDataSamples.getTestDataFileContent("54084 - Greek - beyond BMP.txt"), "UTF-8").trim(); + + Sheet sheet = wb.getSheetAt(0); + Row row = sheet.getRow(0); + Cell cell = row.getCell(0); + + String value = cell.getStringCellValue(); + //System.out.println(value); + + assertEquals("The data in the text-file should exactly match the data that we read from the workbook", testData, value); + } } diff --git a/test-data/spreadsheet/54084 - Greek - beyond BMP.txt b/test-data/spreadsheet/54084 - Greek - beyond BMP.txt new file mode 100644 index 000000000..11523527b --- /dev/null +++ b/test-data/spreadsheet/54084 - Greek - beyond BMP.txt @@ -0,0 +1 @@ +𝝊𝝋𝝌𝝍𝝎𝝏𝝐𝝑𝝒𝝓𝝔𝝕𝝖𝝗𝝘𝝙𝝚𝝛𝝜𝝝𝝞𝝟𝝠𝝡𝝢𝝣𝝤𝝥𝝦𝝧𝝨𝝩𝝪𝝫𝝬𝝭𝝮𝝯𝝰𝝱𝝲𝝳𝝴𝝵𝝶𝝷𝝸𝝹𝝺 diff --git a/test-data/spreadsheet/54084 - Greek - beyond BMP.xlsx b/test-data/spreadsheet/54084 - Greek - beyond BMP.xlsx new file mode 100644 index 000000000..bc2772286 Binary files /dev/null and b/test-data/spreadsheet/54084 - Greek - beyond BMP.xlsx differ