Bug 53130 - SXSSF Shared Strings option support, to make generated xlsx files compatible with Google Docs or iPad

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1568539 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2014-02-14 22:45:05 +00:00
parent 92a488a28b
commit a0831ba0f9
4 changed files with 129 additions and 12 deletions

View File

@ -29,6 +29,8 @@ import java.io.Writer;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream; import java.util.zip.GZIPOutputStream;
import org.apache.poi.xssf.model.SharedStringsTable;
/** /**
* Sheet writer that supports gzip compression of the temp files. * Sheet writer that supports gzip compression of the temp files.
*/ */
@ -38,6 +40,13 @@ public class GZIPSheetDataWriter extends SheetDataWriter {
super(); super();
} }
/**
* @param sharedStringsTable the shared strings table, or null if inline text is used
*/
public GZIPSheetDataWriter(SharedStringsTable sharedStringsTable) throws IOException {
super(sharedStringsTable);
}
/** /**
* @return temp file to write sheet data * @return temp file to write sheet data
*/ */

View File

@ -43,12 +43,21 @@ import java.util.zip.ZipEntry;
import org.apache.poi.ss.formula.udf.UDFFinder; import org.apache.poi.ss.formula.udf.UDFFinder;
import org.apache.poi.ss.usermodel.Row.MissingCellPolicy; import org.apache.poi.ss.usermodel.Row.MissingCellPolicy;
import org.apache.poi.ss.util.CellRangeAddress; import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.xssf.model.SharedStringsTable;
/** /**
* Streaming version of XSSFWorkbook implementing the "BigGridDemo" strategy. * Streaming version of XSSFWorkbook implementing the "BigGridDemo" strategy.
* *
* @author Alex Geller, Four J's Development Tools * SXSSFWorkbook defaults to using inline strings instead of a shared strings
*/ * table. This is very efficient, since no document content needs to be kept in
* memory, but is also known to produce documents that are incompatible with
* some clients. With shared strings enabled all unique strings in the document
* has to be kept in memory. Depending on your document content this could use
* a lot more resources than with shared strings disabled.
*
* Carefully review your memory budget and compatibility needs before deciding
* whether to enable shared strings or not.
*/
public class SXSSFWorkbook implements Workbook public class SXSSFWorkbook implements Workbook
{ {
/** /**
@ -72,6 +81,11 @@ public class SXSSFWorkbook implements Workbook
*/ */
private boolean _compressTmpFiles = false; private boolean _compressTmpFiles = false;
/**
* shared string table - a cache of strings in this workbook
*/
private SharedStringsTable _sharedStringSource = null;
/** /**
* Construct a new workbook * Construct a new workbook
*/ */
@ -165,15 +179,48 @@ public class SXSSFWorkbook implements Workbook
* @param compressTmpFiles whether to use gzip compression for temporary files * @param compressTmpFiles whether to use gzip compression for temporary files
*/ */
public SXSSFWorkbook(XSSFWorkbook workbook, int rowAccessWindowSize, boolean compressTmpFiles){ public SXSSFWorkbook(XSSFWorkbook workbook, int rowAccessWindowSize, boolean compressTmpFiles){
this(workbook,rowAccessWindowSize, compressTmpFiles, false);
}
/**
* Constructs an workbook from an existing workbook.
* <p>
* When a new node is created via createRow() and the total number
* of unflushed records would exceed the specified value, then the
* row with the lowest index value is flushed and cannot be accessed
* via getRow() anymore.
* </p>
* <p>
* A value of -1 indicates unlimited access. In this case all
* records that have not been flushed by a call to flush() are available
* for random access.
* <p>
* <p></p>
* A value of 0 is not allowed because it would flush any newly created row
* without having a chance to specify any cells.
* </p>
*
* @param workbook the template workbook
* @param rowAccessWindowSize
* @param compressTmpFiles whether to use gzip compression for temporary files
* @param useSharedStringsTable whether to use a shared strings table
*/
public SXSSFWorkbook(XSSFWorkbook workbook, int rowAccessWindowSize, boolean compressTmpFiles, boolean useSharedStringsTable){
setRandomAccessWindowSize(rowAccessWindowSize); setRandomAccessWindowSize(rowAccessWindowSize);
setCompressTempFiles(compressTmpFiles); setCompressTempFiles(compressTmpFiles);
if (workbook == null) if (workbook == null)
{ {
_wb=new XSSFWorkbook(); _wb=new XSSFWorkbook();
if(useSharedStringsTable){
_sharedStringSource = _wb.getSharedStringSource();
}
} }
else else
{ {
_wb=workbook; _wb=workbook;
if(useSharedStringsTable){
_sharedStringSource = _wb.getSharedStringSource();
}
for ( int i = 0; i < _wb.getNumberOfSheets(); i++ ) for ( int i = 0; i < _wb.getNumberOfSheets(); i++ )
{ {
XSSFSheet sheet = _wb.getSheetAt( i ); XSSFSheet sheet = _wb.getSheetAt( i );
@ -236,9 +283,9 @@ public class SXSSFWorkbook implements Workbook
SheetDataWriter createSheetDataWriter() throws IOException { SheetDataWriter createSheetDataWriter() throws IOException {
if(_compressTmpFiles) { if(_compressTmpFiles) {
return new GZIPSheetDataWriter(); return new GZIPSheetDataWriter(_sharedStringSource);
} else { } else {
return new SheetDataWriter(); return new SheetDataWriter(_sharedStringSource);
} }
} }

View File

@ -32,6 +32,9 @@ import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle; import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.FormulaError; import org.apache.poi.ss.usermodel.FormulaError;
import org.apache.poi.ss.util.CellReference; import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.STCellType;
/** /**
* Initially copied from BigGridDemo "SpreadsheetWriter". * Initially copied from BigGridDemo "SpreadsheetWriter".
@ -48,11 +51,21 @@ public class SheetDataWriter {
private int _numberOfCellsOfLastFlushedRow; // meaningful only of _numberOfFlushedRows>0 private int _numberOfCellsOfLastFlushedRow; // meaningful only of _numberOfFlushedRows>0
private int _numberLastFlushedRow = -1; // meaningful only of _numberOfFlushedRows>0 private int _numberLastFlushedRow = -1; // meaningful only of _numberOfFlushedRows>0
/**
* Table of strings shared across this workbook.
* If two cells contain the same string, then the cell value is the same index into SharedStringsTable
*/
private SharedStringsTable _sharedStringSource;
public SheetDataWriter() throws IOException { public SheetDataWriter() throws IOException {
_fd = createTempFile(); _fd = createTempFile();
_out = createWriter(_fd); _out = createWriter(_fd);
} }
public SheetDataWriter(SharedStringsTable sharedStringsTable) throws IOException{
this();
this._sharedStringSource = sharedStringsTable;
}
/** /**
* Create a temp file to write sheet data. * Create a temp file to write sheet data.
* By default, temp files are created in the default temporary-file directory * By default, temp files are created in the default temporary-file directory
@ -196,14 +209,24 @@ public class SheetDataWriter {
break; break;
} }
case Cell.CELL_TYPE_STRING: { case Cell.CELL_TYPE_STRING: {
_out.write(" t=\"inlineStr\">"); if (_sharedStringSource != null) {
_out.write("<is><t"); XSSFRichTextString rt = new XSSFRichTextString(cell.getStringCellValue());
if(hasLeadingTrailingSpaces(cell.getStringCellValue())) { int sRef = _sharedStringSource.addEntry(rt.getCTRst());
_out.write(" xml:space=\"preserve\"");
_out.write(" t=\"" + STCellType.S.toString() + "\">");
_out.write("<v>");
_out.write(String.valueOf(sRef));
_out.write("</v>");
} else {
_out.write(" t=\"inlineStr\">");
_out.write("<is><t");
if (hasLeadingTrailingSpaces(cell.getStringCellValue())) {
_out.write(" xml:space=\"preserve\"");
}
_out.write(">");
outputQuotedString(cell.getStringCellValue());
_out.write("</t></is>");
} }
_out.write(">");
outputQuotedString(cell.getStringCellValue());
_out.write("</t></is>");
break; break;
} }
case Cell.CELL_TYPE_NUMERIC: { case Cell.CELL_TYPE_NUMERIC: {
@ -245,7 +268,7 @@ public class SheetDataWriter {
} }
//Taken from jdk1.3/src/javax/swing/text/html/HTMLWriter.java //Taken from jdk1.3/src/javax/swing/text/html/HTMLWriter.java
protected void outputQuotedString(String s) throws IOException { protected void outputQuotedString(String s) throws IOException {
if (s == null || s.length() == 0) { if (s == null || s.length() == 0) {
return; return;
} }

View File

@ -23,6 +23,7 @@ import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.lang.reflect.Field;
import org.apache.poi.ss.usermodel.BaseTestWorkbook; import org.apache.poi.ss.usermodel.BaseTestWorkbook;
import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Cell;
@ -32,6 +33,7 @@ import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory; import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.ss.util.CellReference; import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.SXSSFITestDataProvider; import org.apache.poi.xssf.SXSSFITestDataProvider;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public final class TestSXSSFWorkbook extends BaseTestWorkbook { public final class TestSXSSFWorkbook extends BaseTestWorkbook {
@ -90,6 +92,42 @@ public final class TestSXSSFWorkbook extends BaseTestWorkbook {
} }
public void testUseSharedStringsTable() throws Exception {
SXSSFWorkbook wb = new SXSSFWorkbook(null, 10, false, true);
Field f = SXSSFWorkbook.class.getDeclaredField("_sharedStringSource");
f.setAccessible(true);
SharedStringsTable sss = (SharedStringsTable)f.get(wb);
assertNotNull(sss);
Row row = wb.createSheet("S1").createRow(0);
row.createCell(0).setCellValue("A");
row.createCell(1).setCellValue("B");
row.createCell(2).setCellValue("A");
XSSFWorkbook xssfWorkbook = (XSSFWorkbook) SXSSFITestDataProvider.instance.writeOutAndReadBack(wb);
sss = (SharedStringsTable)f.get(wb);
assertEquals(2, sss.getUniqueCount());
wb.dispose();
Sheet sheet1 = xssfWorkbook.getSheetAt(0);
assertEquals("S1", sheet1.getSheetName());
assertEquals(1, sheet1.getPhysicalNumberOfRows());
row = sheet1.getRow(0);
assertNotNull(row);
Cell cell = row.getCell(0);
assertNotNull(cell);
assertEquals("A", cell.getStringCellValue());
cell = row.getCell(1);
assertNotNull(cell);
assertEquals("B", cell.getStringCellValue());
cell = row.getCell(2);
assertNotNull(cell);
assertEquals("A", cell.getStringCellValue());
}
public void testAddToExistingWorkbook() { public void testAddToExistingWorkbook() {
XSSFWorkbook xssfWorkbook = new XSSFWorkbook(); XSSFWorkbook xssfWorkbook = new XSSFWorkbook();
xssfWorkbook.createSheet("S1"); xssfWorkbook.createSheet("S1");