From 6216c511a952b455396576617663be37a11d6060 Mon Sep 17 00:00:00 2001
From: Yegor Kozlov
Date: Mon, 30 Nov 2009 14:09:03 +0000
Subject: [PATCH] memory usage optimization in XSSF - avoid creating parentless
xml beans
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@885429 13f79535-47bb-0310-9956-ffa450edef68
---
src/documentation/content/xdocs/status.xml | 1 +
.../poi/xssf/model/SharedStringsTable.java | 20 +-
.../apache/poi/xssf/usermodel/XSSFRow.java | 19 +-
.../apache/poi/xssf/usermodel/XSSFSheet.java | 13 +-
.../poi/xssf/usermodel/XSSFWorkbook.java | 9 +-
.../helpers/RichTextStringHelper.java | 56 -----
.../poi/xssf/usermodel/TestXSSFSheet.java | 19 +-
.../org/apache/poi/xssf/util/MemoryUsage.java | 197 ++++++++++++++++++
8 files changed, 260 insertions(+), 74 deletions(-)
delete mode 100644 src/ooxml/java/org/apache/poi/xssf/usermodel/helpers/RichTextStringHelper.java
create mode 100755 src/ooxml/testcases/org/apache/poi/xssf/util/MemoryUsage.java
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index 9109c2b37..26d634124 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,7 @@
+ memory usage optimization in XSSF - avoid creating parentless xml beans
47188 - avoid corruption of workbook when adding cell comments
48106 - improved work with cell comments in XSSF
Add support for creating SummaryInformation and DocumentSummaryInformation properties
diff --git a/src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTable.java b/src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTable.java
index 8287df55b..ac648c142 100644
--- a/src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTable.java
+++ b/src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTable.java
@@ -84,8 +84,12 @@ public class SharedStringsTable extends POIXMLDocumentPart {
*/
private int uniqueCount;
+ private SstDocument _sstDoc;
+
public SharedStringsTable() {
super();
+ _sstDoc = SstDocument.Factory.newInstance();
+ _sstDoc.addNewSst();
}
public SharedStringsTable(PackagePart part, PackageRelationship rel) throws IOException {
@@ -102,7 +106,8 @@ public class SharedStringsTable extends POIXMLDocumentPart {
public void readFrom(InputStream is) throws IOException {
try {
int cnt = 0;
- CTSst sst = SstDocument.Factory.parse(is).getSst();
+ _sstDoc = SstDocument.Factory.parse(is);
+ CTSst sst = _sstDoc.getSst();
count = (int)sst.getCount();
uniqueCount = (int)sst.getUniqueCount();
for (CTRst st : sst.getSiArray()) {
@@ -163,10 +168,14 @@ public class SharedStringsTable extends POIXMLDocumentPart {
if (stmap.containsKey(s)) {
return stmap.get(s);
}
+
uniqueCount++;
+ //create a CTRst bean attached to this SstDocument and copy the argument CTRst into it
+ CTRst newSt = _sstDoc.getSst().addNewSi();
+ newSt.set(st);
int idx = strings.size();
stmap.put(s, idx);
- strings.add(st);
+ strings.add(newSt);
return idx;
}
/**
@@ -188,14 +197,11 @@ public class SharedStringsTable extends POIXMLDocumentPart {
XmlOptions options = new XmlOptions(DEFAULT_XML_OPTIONS);
//re-create the sst table every time saving a workbook
- SstDocument doc = SstDocument.Factory.newInstance();
- CTSst sst = doc.addNewSst();
+ CTSst sst = _sstDoc.getSst();
sst.setCount(count);
sst.setUniqueCount(uniqueCount);
- CTRst[] ctr = strings.toArray(new CTRst[strings.size()]);
- sst.setSiArray(ctr);
- doc.save(out, options);
+ _sstDoc.save(out, options);
}
@Override
diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRow.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRow.java
index d81a69240..91bb3e4c8 100644
--- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRow.java
+++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRow.java
@@ -35,7 +35,7 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
* High level representation of a row of a spreadsheet.
*/
public class XSSFRow implements Row, Comparable {
- private static final POILogger logger = POILogFactory.getLogger(XSSFRow.class);
+ private static final POILogger _logger = POILogFactory.getLogger(XSSFRow.class);
/**
* the xml bean containing all cell definitions for this row
@@ -46,7 +46,7 @@ public class XSSFRow implements Row, Comparable {
* Cells of this row keyed by their column indexes.
* The TreeMap ensures that the cells are ordered by columnIndex in the ascending order.
*/
- private final TreeMap _cells;
+ private final TreeMap _cells;
/**
* the parent sheet
@@ -62,7 +62,7 @@ public class XSSFRow implements Row, Comparable {
protected XSSFRow(CTRow row, XSSFSheet sheet) {
_row = row;
_sheet = sheet;
- _cells = new TreeMap();
+ _cells = new TreeMap();
for (CTCell c : row.getCArray()) {
XSSFCell cell = new XSSFCell(this, c);
_cells.put(cell.getColumnIndex(), cell);
@@ -91,7 +91,7 @@ public class XSSFRow implements Row, Comparable {
* @return an iterator over cells in this row.
*/
public Iterator cellIterator() {
- return _cells.values().iterator();
+ return (Iterator)(Iterator extends Cell>)_cells.values().iterator();
}
/**
@@ -160,8 +160,15 @@ public class XSSFRow implements Row, Comparable {
* @see Cell#CELL_TYPE_STRING
*/
public XSSFCell createCell(int columnIndex, int type) {
- CTCell ctcell = CTCell.Factory.newInstance();
- XSSFCell xcell = new XSSFCell(this, ctcell);
+ CTCell ctCell;
+ XSSFCell prev = _cells.get(columnIndex);
+ if(prev != null){
+ ctCell = prev.getCTCell();
+ ctCell.set(CTCell.Factory.newInstance());
+ } else {
+ ctCell = _row.addNewC();
+ }
+ XSSFCell xcell = new XSSFCell(this, ctCell);
xcell.setCellNum(columnIndex);
if (type != Cell.CELL_TYPE_BLANK) {
xcell.setCellType(type);
diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFSheet.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFSheet.java
index 2de3c16b0..318f2f6dd 100644
--- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFSheet.java
+++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFSheet.java
@@ -69,8 +69,10 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.*;
public class XSSFSheet extends POIXMLDocumentPart implements Sheet {
private static final POILogger logger = POILogFactory.getLogger(XSSFSheet.class);
+ //TODO make the two variable below private!
protected CTSheet sheet;
protected CTWorksheet worksheet;
+
private TreeMap rows;
private List hyperlinks;
private ColumnHelper columnHelper;
@@ -422,10 +424,17 @@ public class XSSFSheet extends POIXMLDocumentPart implements Sheet {
* @see #removeRow(org.apache.poi.ss.usermodel.Row)
*/
public XSSFRow createRow(int rownum) {
- CTRow ctRow = CTRow.Factory.newInstance();
+ CTRow ctRow;
+ XSSFRow prev = rows.get(rownum);
+ if(prev != null){
+ ctRow = prev.getCTRow();
+ ctRow.set(CTRow.Factory.newInstance());
+ } else {
+ ctRow = worksheet.getSheetData().addNewRow();
+ }
XSSFRow r = new XSSFRow(ctRow, this);
r.setRowNum(rownum);
- rows.put(r.getRowNum(), r);
+ rows.put(rownum, r);
return r;
}
diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java
index 1de055db3..9af7515af 100644
--- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java
+++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java
@@ -148,6 +148,12 @@ public class XSSFWorkbook extends POIXMLDocument implements Workbook, Iterable
+ * cells in even columns are numbers, cells in odd columns are strings
+ * | |
+ *
+ * @param wb the workbook to write to
+ * @param numCols the number of columns in a row
+ */
+ public static void mixedSpreadsheet(Workbook wb, int numCols){
+
+ System.out.println("Testing " + wb.getClass().getName());
+ System.out.println("Memory: " + Runtime.getRuntime().totalMemory()/(1024*1024) + "MB");
+ int i=0, cnt=0;
+ try {
+ Sheet sh = wb.createSheet();
+ for(i=0; ; i++){
+ Row row = sh.createRow(i);
+ for(int j=0; j < numCols; j++){
+ Cell cell = row.createCell(j);
+ if(j % 2 == 0) cell.setCellValue(j);
+ else cell.setCellValue(new CellReference(j, i).formatAsString());
+ cnt++;
+ }
+ }
+ } catch (OutOfMemoryError er){
+ System.out.println("Failed at row=" + i + ", objects : " + cnt);
+ }
+ System.out.println("Memory: " + Runtime.getRuntime().totalMemory()/(1024*1024) + "MB");
+ }
+
+ /**
+ * Generate a spreadsheet who's all cell values are numbers.
+ * The data is generated until OutOfMemoryError.
+ *
+ * as compared to {@link #mixedSpreadsheet(org.apache.poi.ss.usermodel.Workbook, int)},
+ * this method does not set string values and, hence, does not invole the Shared Strings Table.
+ *
+ *
+ * @param wb the workbook to write to
+ * @param numCols the number of columns in a row
+ */
+ public static void numberSpreadsheet(Workbook wb, int numCols){
+
+ System.out.println("Testing " + wb.getClass().getName());
+ System.out.println("Memory: " + Runtime.getRuntime().totalMemory()/(1024*1024) + "MB");
+ int i=0, cnt=0;
+ try {
+ Sheet sh = wb.createSheet();
+ for(i=0; ; i++){
+ Row row = sh.createRow(i);
+ for(int j=0; j < numCols; j++){
+ Cell cell = row.createCell(j);
+ cell.setCellValue(j);
+ cnt++;
+ }
+ }
+ } catch (OutOfMemoryError er){
+ System.out.println("Failed at row=" + i + ", objects : " + cnt);
+ }
+ System.out.println("Memory: " + Runtime.getRuntime().totalMemory()/(1024*1024) + "MB");
+ }
+
+ /**
+ * Generate a spreadsheet until OutOfMemoryError using low-level OOXML XmlBeans.
+ * Similar to {@link #numberSpreadsheet(org.apache.poi.ss.usermodel.Workbook, int)}
+ *
+ *
+ *
+ * @param numCols the number of columns in a row
+ */
+ public static void xmlBeans(int numCols) {
+ int i = 0, cnt = 0;
+ System.out.println("Memory: " + Runtime.getRuntime().totalMemory() / (1024 * 1024) + "MB");
+
+ CTWorksheet sh = CTWorksheet.Factory.newInstance();
+ CTSheetData data = sh.addNewSheetData();
+ try {
+ for (i = 0; ; i++) {
+ CTRow row = data.addNewRow();
+ row.setR(i);
+ for (int j = 0; j < numCols; j++) {
+ CTCell cell = row.addNewC();
+ cell.setT(STCellType.N);
+ cell.setV(String.valueOf(j));
+ cnt++;
+ }
+ }
+ } catch (OutOfMemoryError er) {
+ System.out.println("Failed at row=" + i + ", objects: " + cnt);
+ }
+ System.out.println("Memory: " + Runtime.getRuntime().totalMemory() / (1024 * 1024) + "MB");
+ }
+
+ /**
+ * Generate detached (parentless) Xml beans until OutOfMemoryError
+ *
+ * @see #testXmlAttached()
+ */
+ public void testXmlDetached(){
+ List rows = new ArrayList();
+ int i = 0;
+ try {
+ for(;;){
+ //create a standalone CTRow bean
+ CTRow r = CTRow.Factory.newInstance();
+ r.setR(++i);
+ rows.add(r);
+ }
+ } catch (OutOfMemoryError er) {
+ System.out.println("Failed at row=" + i);
+ }
+ System.out.println("Memory: " + Runtime.getRuntime().totalMemory() / (1024 * 1024) + "MB");
+ }
+
+ /**
+ * Generate atatched (having a parent bean) Xml beans until OutOfMemoryError.
+ * This is MUCH more memory-efficient than {@link #testXmlDetached()}
+ *
+ * @see #testXmlAttached()
+ */
+ public void testXmlAttached(){
+ List rows = new ArrayList();
+ int i = 0;
+ //top-level element in sheet.xml
+ CTWorksheet sh = CTWorksheet.Factory.newInstance();
+ CTSheetData data = sh.addNewSheetData();
+ try {
+ for(;;){
+ //create CTRow attached to the parent object
+ CTRow r = data.addNewRow();
+ r.setR(++i);
+ rows.add(r);
+ }
+ } catch (OutOfMemoryError er) {
+ System.out.println("Failed at row=" + i);
+ }
+ System.out.println("Memory: " + Runtime.getRuntime().totalMemory() / (1024 * 1024) + "MB");
+ }
+
+ public void testMixedHSSF(){
+ numberSpreadsheet(new HSSFWorkbook(), NUM_COLUMNS);
+ }
+
+ public void testMixedXSSF(){
+ numberSpreadsheet(new XSSFWorkbook(), NUM_COLUMNS);
+ }
+
+ public void testNumberHSSF(){
+ numberSpreadsheet(new HSSFWorkbook(), NUM_COLUMNS);
+ }
+
+ public void testNumberXSSF(){
+ numberSpreadsheet(new XSSFWorkbook(), NUM_COLUMNS);
+ }
+
+}
\ No newline at end of file