SXSSF documentation and polishing
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1133372 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b746022da0
commit
93c599be72
@ -58,6 +58,7 @@
|
|||||||
<li><link href="#event_api">Event API (HSSF Only)</link></li>
|
<li><link href="#event_api">Event API (HSSF Only)</link></li>
|
||||||
<li><link href="#record_aware_event_api">Event API with extensions to be Record Aware (HSSF Only)</link></li>
|
<li><link href="#record_aware_event_api">Event API with extensions to be Record Aware (HSSF Only)</link></li>
|
||||||
<li><link href="#xssf_sax_api">XSSF and SAX (Event API)</link></li>
|
<li><link href="#xssf_sax_api">XSSF and SAX (Event API)</link></li>
|
||||||
|
<li><link href="#sxssf">SXSSF (Streaming User API)</link></li>
|
||||||
<li><link href="#low_level_api">Low Level API</link></li>
|
<li><link href="#low_level_api">Low Level API</link></li>
|
||||||
</ul>
|
</ul>
|
||||||
</section>
|
</section>
|
||||||
@ -620,6 +621,74 @@ public class ExampleEventUserModel {
|
|||||||
howto.processAllSheets(args[0]);
|
howto.processAllSheets(args[0]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
]]></source>
|
||||||
|
</section>
|
||||||
|
<anchor id="sxssf"/>
|
||||||
|
<section><title>SXSSF (Streaming Usermodel API)</title>
|
||||||
|
<p>
|
||||||
|
XSSF is an API-compatible streaming extension of XSSF to be used when
|
||||||
|
very large spreadsheets have to be produced, and heap space is limited.
|
||||||
|
SXSSF achieves its low memory footprint by limiting access to the rows that
|
||||||
|
are within a sliding window, while XSSF gives access to all rows in the
|
||||||
|
document. Older rows that are no longer in the window become inaccessible,
|
||||||
|
as they are written to the disk.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
When a new row is created via createRow() and the total number
|
||||||
|
of unflushed records would exeed the specified window size, then the
|
||||||
|
row with the lowest index value is flushed and cannot be accessed
|
||||||
|
via getRow() anymore.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
A value of -1 indicates unlimited access. In this case all
|
||||||
|
records that have not been flushed by a call to flush() are available
|
||||||
|
for random access.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<source><![CDATA[
|
||||||
|
package com.dinom.excel;
|
||||||
|
|
||||||
|
import org.apache.poi.ss.SpreadsheetVersion;
|
||||||
|
import org.apache.poi.ss.usermodel.Cell;
|
||||||
|
import org.apache.poi.ss.usermodel.Row;
|
||||||
|
import org.apache.poi.ss.usermodel.Sheet;
|
||||||
|
import org.apache.poi.ss.usermodel.Workbook;
|
||||||
|
import org.apache.poi.ss.util.CellReference;
|
||||||
|
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
|
||||||
|
|
||||||
|
public class Main {
|
||||||
|
public static void main(String[] args) throws Throwable {
|
||||||
|
Workbook wb = new SXSSFWorkbook(100); // keep 100 rows in memory
|
||||||
|
Sheet sh = wb.createSheet();
|
||||||
|
for(int rownum = 0; rownum < 100000; rownum++){
|
||||||
|
Row row = sh.createRow(rownum);
|
||||||
|
for(int cellnum = 0; cellnum < 1000; cellnum++){
|
||||||
|
Cell cell = row.createCell(cellnum);
|
||||||
|
String address = new CellReference(cell).formatAsString();
|
||||||
|
cell.setCellValue(address);
|
||||||
|
}
|
||||||
|
|
||||||
|
// previous row is withing the window and accessible
|
||||||
|
Row prev = sheet.getRow(rownum - 1);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// attempt to access flushed rows results in a exception:
|
||||||
|
try {
|
||||||
|
Row firstRow = sheet.getRow(0);
|
||||||
|
} catch (Exception e){
|
||||||
|
System.out.println("cannot access flushed rows");
|
||||||
|
}
|
||||||
|
|
||||||
|
FileOutputStream out = new FileOutputStream("/temp/sxssf.xlsx");
|
||||||
|
wb.write(out);
|
||||||
|
out.close();
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
]]></source>
|
]]></source>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
|
@ -73,6 +73,42 @@
|
|||||||
the memory footprint for processing them is higher than for the
|
the memory footprint for processing them is higher than for the
|
||||||
older HSSF supported (.xls) binary files.
|
older HSSF supported (.xls) binary files.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<title>SXSSF (SInce POI 3.8 beta3)</title>
|
||||||
|
<p>Since 3.8-beta3, POI provides a low-memory footprint SXSSF API built on top of XSSF.</p>
|
||||||
|
<p>
|
||||||
|
XSSF is an API-compatible streaming extension of XSSF to be used when
|
||||||
|
very large spreadsheets have to be produced, and heap space is limited.
|
||||||
|
SXSSF achieves its low memory footprint by limiting access to the rows that
|
||||||
|
are within a sliding window, while XSSF gives access to all rows in the
|
||||||
|
document. Older rows that are no longer in the window become inaccessible,
|
||||||
|
as they are written to the disk.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
In auto-flush mode the size of the access window can be specified, to hold a certain number of rows in memory. When that value is reached, the creationof an additional row causes the row with the lowest index to to be removed from the access window and written to disk.. Or, the window size can be set to grow dynamically; it can be trimmed periodically by an explicit call to flush(int keepRows) as needed.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Due to the streaming nature of the implementation, there are the following
|
||||||
|
limitations when compared to XSSF:
|
||||||
|
</p>
|
||||||
|
<ul>
|
||||||
|
<li>Only a limited number of rows are accessible at a point in time.</li>
|
||||||
|
<li>Sheet.clone() is not supported.</li>
|
||||||
|
<li>Formula evaluation is not supported</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>The table below synopsizes the comparative features of POI's Spreadsheet API:</p>
|
||||||
|
<p><em>Spreadsheet API Feature Summary</em></p>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
<img src="../resources/images/ss-features.png" alt="Spreadsheet API Feature Summary"/>
|
||||||
|
</p>
|
||||||
|
</section>
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
</document>
|
</document>
|
||||||
|
BIN
src/documentation/resources/images/ss-features.png
Normal file
BIN
src/documentation/resources/images/ss-features.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 13 KiB |
@ -36,7 +36,18 @@ import org.apache.poi.xssf.usermodel.*;
|
|||||||
* 2. create an application that streams data in a text file
|
* 2. create an application that streams data in a text file
|
||||||
* 3. Substitute the sheet in the template with the generated data
|
* 3. Substitute the sheet in the template with the generated data
|
||||||
*
|
*
|
||||||
|
* <p>
|
||||||
|
* Since 3.8-beta3 POI provides a low-memory footprint SXSSF API which implementing the "BigGridDemo" strategy.
|
||||||
|
* XSSF is an API-compatible streaming extension of XSSF to be used when
|
||||||
|
* very large spreadsheets have to be produced, and heap space is limited.
|
||||||
|
* SXSSF achieves its low memory footprint by limiting access to the rows that
|
||||||
|
* are within a sliding window, while XSSF gives access to all rows in the
|
||||||
|
* document. Older rows that are no longer in the window become inaccessible,
|
||||||
|
* as they are written to the disk.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
* @author Yegor Kozlov
|
* @author Yegor Kozlov
|
||||||
|
* @see <a "http://poi.apache.org/spreadsheet/how-to.html#sxssf">http://poi.apache.org/spreadsheet/how-to.html#sxssf</a>
|
||||||
*/
|
*/
|
||||||
public class BigGridDemo {
|
public class BigGridDemo {
|
||||||
private static final String XML_ENCODING = "UTF-8";
|
private static final String XML_ENCODING = "UTF-8";
|
||||||
|
@ -17,15 +17,10 @@
|
|||||||
|
|
||||||
package org.apache.poi.xssf.streaming;
|
package org.apache.poi.xssf.streaming;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.io.Writer;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileWriter;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
|
|
||||||
import org.apache.poi.ss.usermodel.*;
|
import org.apache.poi.ss.usermodel.*;
|
||||||
import org.apache.poi.ss.util.CellReference;
|
import org.apache.poi.ss.util.CellReference;
|
||||||
@ -46,7 +41,7 @@ public class SXSSFSheet implements Sheet, Cloneable
|
|||||||
XSSFSheet _sh;
|
XSSFSheet _sh;
|
||||||
TreeMap<Integer,SXSSFRow> _rows=new TreeMap<Integer,SXSSFRow>();
|
TreeMap<Integer,SXSSFRow> _rows=new TreeMap<Integer,SXSSFRow>();
|
||||||
SheetDataWriter _writer;
|
SheetDataWriter _writer;
|
||||||
int _randomAccessWindowSize=5000;
|
int _randomAccessWindowSize = SXSSFWorkbook.DEFAULT_WINDOW_SIZE;
|
||||||
|
|
||||||
public SXSSFSheet(SXSSFWorkbook workbook,XSSFSheet xSheet) throws IOException
|
public SXSSFSheet(SXSSFWorkbook workbook,XSSFSheet xSheet) throws IOException
|
||||||
{
|
{
|
||||||
@ -1243,7 +1238,7 @@ public class SXSSFSheet implements Sheet, Cloneable
|
|||||||
{
|
{
|
||||||
_fd = File.createTempFile("sheet", ".xml");
|
_fd = File.createTempFile("sheet", ".xml");
|
||||||
_fd.deleteOnExit();
|
_fd.deleteOnExit();
|
||||||
_out = new FileWriter(_fd);
|
_out = new BufferedWriter(new FileWriter(_fd));
|
||||||
_out.write("<sheetData>\n");
|
_out.write("<sheetData>\n");
|
||||||
}
|
}
|
||||||
public int getNumberOfFlushedRows()
|
public int getNumberOfFlushedRows()
|
||||||
@ -1265,6 +1260,7 @@ public class SXSSFSheet implements Sheet, Cloneable
|
|||||||
public InputStream getWorksheetXMLInputStream() throws IOException
|
public InputStream getWorksheetXMLInputStream() throws IOException
|
||||||
{
|
{
|
||||||
_out.write("</sheetData>");
|
_out.write("</sheetData>");
|
||||||
|
_out.flush();
|
||||||
_out.close();
|
_out.close();
|
||||||
return new FileInputStream(_fd);
|
return new FileInputStream(_fd);
|
||||||
}
|
}
|
||||||
|
@ -50,11 +50,56 @@ import org.apache.poi.ss.usermodel.Row.MissingCellPolicy;
|
|||||||
*/
|
*/
|
||||||
public class SXSSFWorkbook implements Workbook
|
public class SXSSFWorkbook implements Workbook
|
||||||
{
|
{
|
||||||
|
/**
|
||||||
|
* Specifies how many rows can be accessed at most via getRow().
|
||||||
|
* When a new node is created via createRow() and the total number
|
||||||
|
* of unflushed records would exeed the specified value, then the
|
||||||
|
* row with the lowest index value is flushed and cannot be accessed
|
||||||
|
* via getRow() anymore.
|
||||||
|
*/
|
||||||
|
public static final int DEFAULT_WINDOW_SIZE = 100;
|
||||||
|
|
||||||
XSSFWorkbook _wb=new XSSFWorkbook();
|
XSSFWorkbook _wb=new XSSFWorkbook();
|
||||||
|
|
||||||
HashMap<SXSSFSheet,XSSFSheet> _sxFromXHash=new HashMap<SXSSFSheet,XSSFSheet>();
|
HashMap<SXSSFSheet,XSSFSheet> _sxFromXHash=new HashMap<SXSSFSheet,XSSFSheet>();
|
||||||
HashMap<XSSFSheet,SXSSFSheet> _xFromSxHash=new HashMap<XSSFSheet,SXSSFSheet>();
|
HashMap<XSSFSheet,SXSSFSheet> _xFromSxHash=new HashMap<XSSFSheet,SXSSFSheet>();
|
||||||
|
|
||||||
|
int _randomAccessWindowSize = DEFAULT_WINDOW_SIZE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a new workbook
|
||||||
|
*/
|
||||||
|
public SXSSFWorkbook(){
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct an empty workbook and specify the window for row access.
|
||||||
|
* <p>
|
||||||
|
* When a new node is created via createRow() and the total number
|
||||||
|
* of unflushed records would exeed the specified value, then the
|
||||||
|
* row with the lowest index value is flushed and cannot be accessed
|
||||||
|
* via getRow() anymore.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* A value of -1 indicates unlimited access. In this case all
|
||||||
|
* records that have not been flushed by a call to flush() are available
|
||||||
|
* for random access.
|
||||||
|
* <p>
|
||||||
|
* <p></p>
|
||||||
|
* A value of 0 is not allowed because it would flush any newly created row
|
||||||
|
* without having a chance to specify any cells.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @param rowAccessWindowSize
|
||||||
|
*/
|
||||||
|
public SXSSFWorkbook(int rowAccessWindowSize){
|
||||||
|
if(rowAccessWindowSize == 0 || rowAccessWindowSize < -1) {
|
||||||
|
throw new IllegalArgumentException("rowAccessWindowSize must be greater than 0 or -1");
|
||||||
|
}
|
||||||
|
_randomAccessWindowSize = rowAccessWindowSize;
|
||||||
|
}
|
||||||
|
|
||||||
XSSFSheet getXSSFSheet(SXSSFSheet sheet)
|
XSSFSheet getXSSFSheet(SXSSFSheet sheet)
|
||||||
{
|
{
|
||||||
XSSFSheet result=_sxFromXHash.get(sheet);
|
XSSFSheet result=_sxFromXHash.get(sheet);
|
||||||
@ -303,6 +348,7 @@ public class SXSSFWorkbook implements Workbook
|
|||||||
{
|
{
|
||||||
throw new RuntimeException(ioe);
|
throw new RuntimeException(ioe);
|
||||||
}
|
}
|
||||||
|
sxSheet.setRandomAccessWindowSize(_randomAccessWindowSize);
|
||||||
registerSheetMapping(sxSheet,xSheet);
|
registerSheetMapping(sxSheet,xSheet);
|
||||||
return sxSheet;
|
return sxSheet;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user