memory usage optimization in XSSF - avoid creating parentless xml beans

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@885429 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2009-11-30 14:09:03 +00:00
parent baa4640ec9
commit 6216c511a9
8 changed files with 260 additions and 74 deletions

View File

@ -34,6 +34,7 @@
<changes> <changes>
<release version="3.6-beta1" date="2009-??-??"> <release version="3.6-beta1" date="2009-??-??">
<action dev="POI-DEVELOPERS" type="add"> memory usage optimization in XSSF - avoid creating parentless xml beans</action>
<action dev="POI-DEVELOPERS" type="fix">47188 - avoid corruption of workbook when adding cell comments </action> <action dev="POI-DEVELOPERS" type="fix">47188 - avoid corruption of workbook when adding cell comments </action>
<action dev="POI-DEVELOPERS" type="fix">48106 - improved work with cell comments in XSSF</action> <action dev="POI-DEVELOPERS" type="fix">48106 - improved work with cell comments in XSSF</action>
<action dev="POI-DEVELOPERS" type="add">Add support for creating SummaryInformation and DocumentSummaryInformation properties <action dev="POI-DEVELOPERS" type="add">Add support for creating SummaryInformation and DocumentSummaryInformation properties

View File

@ -84,8 +84,12 @@ public class SharedStringsTable extends POIXMLDocumentPart {
*/ */
private int uniqueCount; private int uniqueCount;
private SstDocument _sstDoc;
public SharedStringsTable() { public SharedStringsTable() {
super(); super();
_sstDoc = SstDocument.Factory.newInstance();
_sstDoc.addNewSst();
} }
public SharedStringsTable(PackagePart part, PackageRelationship rel) throws IOException { public SharedStringsTable(PackagePart part, PackageRelationship rel) throws IOException {
@ -102,7 +106,8 @@ public class SharedStringsTable extends POIXMLDocumentPart {
public void readFrom(InputStream is) throws IOException { public void readFrom(InputStream is) throws IOException {
try { try {
int cnt = 0; int cnt = 0;
CTSst sst = SstDocument.Factory.parse(is).getSst(); _sstDoc = SstDocument.Factory.parse(is);
CTSst sst = _sstDoc.getSst();
count = (int)sst.getCount(); count = (int)sst.getCount();
uniqueCount = (int)sst.getUniqueCount(); uniqueCount = (int)sst.getUniqueCount();
for (CTRst st : sst.getSiArray()) { for (CTRst st : sst.getSiArray()) {
@ -163,10 +168,14 @@ public class SharedStringsTable extends POIXMLDocumentPart {
if (stmap.containsKey(s)) { if (stmap.containsKey(s)) {
return stmap.get(s); return stmap.get(s);
} }
uniqueCount++; uniqueCount++;
//create a CTRst bean attached to this SstDocument and copy the argument CTRst into it
CTRst newSt = _sstDoc.getSst().addNewSi();
newSt.set(st);
int idx = strings.size(); int idx = strings.size();
stmap.put(s, idx); stmap.put(s, idx);
strings.add(st); strings.add(newSt);
return idx; return idx;
} }
/** /**
@ -188,14 +197,11 @@ public class SharedStringsTable extends POIXMLDocumentPart {
XmlOptions options = new XmlOptions(DEFAULT_XML_OPTIONS); XmlOptions options = new XmlOptions(DEFAULT_XML_OPTIONS);
//re-create the sst table every time saving a workbook //re-create the sst table every time saving a workbook
SstDocument doc = SstDocument.Factory.newInstance(); CTSst sst = _sstDoc.getSst();
CTSst sst = doc.addNewSst();
sst.setCount(count); sst.setCount(count);
sst.setUniqueCount(uniqueCount); sst.setUniqueCount(uniqueCount);
CTRst[] ctr = strings.toArray(new CTRst[strings.size()]); _sstDoc.save(out, options);
sst.setSiArray(ctr);
doc.save(out, options);
} }
@Override @Override

View File

@ -35,7 +35,7 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
* High level representation of a row of a spreadsheet. * High level representation of a row of a spreadsheet.
*/ */
public class XSSFRow implements Row, Comparable<XSSFRow> { public class XSSFRow implements Row, Comparable<XSSFRow> {
private static final POILogger logger = POILogFactory.getLogger(XSSFRow.class); private static final POILogger _logger = POILogFactory.getLogger(XSSFRow.class);
/** /**
* the xml bean containing all cell definitions for this row * the xml bean containing all cell definitions for this row
@ -46,7 +46,7 @@ public class XSSFRow implements Row, Comparable<XSSFRow> {
* Cells of this row keyed by their column indexes. * Cells of this row keyed by their column indexes.
* The TreeMap ensures that the cells are ordered by columnIndex in the ascending order. * The TreeMap ensures that the cells are ordered by columnIndex in the ascending order.
*/ */
private final TreeMap<Integer, Cell> _cells; private final TreeMap<Integer, XSSFCell> _cells;
/** /**
* the parent sheet * the parent sheet
@ -62,7 +62,7 @@ public class XSSFRow implements Row, Comparable<XSSFRow> {
protected XSSFRow(CTRow row, XSSFSheet sheet) { protected XSSFRow(CTRow row, XSSFSheet sheet) {
_row = row; _row = row;
_sheet = sheet; _sheet = sheet;
_cells = new TreeMap<Integer, Cell>(); _cells = new TreeMap<Integer, XSSFCell>();
for (CTCell c : row.getCArray()) { for (CTCell c : row.getCArray()) {
XSSFCell cell = new XSSFCell(this, c); XSSFCell cell = new XSSFCell(this, c);
_cells.put(cell.getColumnIndex(), cell); _cells.put(cell.getColumnIndex(), cell);
@ -91,7 +91,7 @@ public class XSSFRow implements Row, Comparable<XSSFRow> {
* @return an iterator over cells in this row. * @return an iterator over cells in this row.
*/ */
public Iterator<Cell> cellIterator() { public Iterator<Cell> cellIterator() {
return _cells.values().iterator(); return (Iterator<Cell>)(Iterator<? extends Cell>)_cells.values().iterator();
} }
/** /**
@ -160,8 +160,15 @@ public class XSSFRow implements Row, Comparable<XSSFRow> {
* @see Cell#CELL_TYPE_STRING * @see Cell#CELL_TYPE_STRING
*/ */
public XSSFCell createCell(int columnIndex, int type) { public XSSFCell createCell(int columnIndex, int type) {
CTCell ctcell = CTCell.Factory.newInstance(); CTCell ctCell;
XSSFCell xcell = new XSSFCell(this, ctcell); XSSFCell prev = _cells.get(columnIndex);
if(prev != null){
ctCell = prev.getCTCell();
ctCell.set(CTCell.Factory.newInstance());
} else {
ctCell = _row.addNewC();
}
XSSFCell xcell = new XSSFCell(this, ctCell);
xcell.setCellNum(columnIndex); xcell.setCellNum(columnIndex);
if (type != Cell.CELL_TYPE_BLANK) { if (type != Cell.CELL_TYPE_BLANK) {
xcell.setCellType(type); xcell.setCellType(type);

View File

@ -69,8 +69,10 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.*;
public class XSSFSheet extends POIXMLDocumentPart implements Sheet { public class XSSFSheet extends POIXMLDocumentPart implements Sheet {
private static final POILogger logger = POILogFactory.getLogger(XSSFSheet.class); private static final POILogger logger = POILogFactory.getLogger(XSSFSheet.class);
//TODO make the two variable below private!
protected CTSheet sheet; protected CTSheet sheet;
protected CTWorksheet worksheet; protected CTWorksheet worksheet;
private TreeMap<Integer, XSSFRow> rows; private TreeMap<Integer, XSSFRow> rows;
private List<XSSFHyperlink> hyperlinks; private List<XSSFHyperlink> hyperlinks;
private ColumnHelper columnHelper; private ColumnHelper columnHelper;
@ -422,10 +424,17 @@ public class XSSFSheet extends POIXMLDocumentPart implements Sheet {
* @see #removeRow(org.apache.poi.ss.usermodel.Row) * @see #removeRow(org.apache.poi.ss.usermodel.Row)
*/ */
public XSSFRow createRow(int rownum) { public XSSFRow createRow(int rownum) {
CTRow ctRow = CTRow.Factory.newInstance(); CTRow ctRow;
XSSFRow prev = rows.get(rownum);
if(prev != null){
ctRow = prev.getCTRow();
ctRow.set(CTRow.Factory.newInstance());
} else {
ctRow = worksheet.getSheetData().addNewRow();
}
XSSFRow r = new XSSFRow(ctRow, this); XSSFRow r = new XSSFRow(ctRow, this);
r.setRowNum(rownum); r.setRowNum(rownum);
rows.put(r.getRowNum(), r); rows.put(rownum, r);
return r; return r;
} }

View File

@ -148,6 +148,12 @@ public class XSSFWorkbook extends POIXMLDocument implements Workbook, Iterable<X
private static POILogger logger = POILogFactory.getLogger(XSSFWorkbook.class); private static POILogger logger = POILogFactory.getLogger(XSSFWorkbook.class);
/**
* cached instance of XSSFCreationHelper for this workbook
* @see {@link #getCreationHelper()}
*/
private XSSFCreationHelper _creationHelper;
/** /**
* Create a new SpreadsheetML workbook. * Create a new SpreadsheetML workbook.
*/ */
@ -1191,7 +1197,8 @@ public class XSSFWorkbook extends POIXMLDocument implements Workbook, Iterable<X
* classes of the various instances for XSSF. * classes of the various instances for XSSF.
*/ */
public XSSFCreationHelper getCreationHelper() { public XSSFCreationHelper getCreationHelper() {
return new XSSFCreationHelper(this); if(_creationHelper == null) _creationHelper = new XSSFCreationHelper(this);
return _creationHelper;
} }
/** /**

View File

@ -1,56 +0,0 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.usermodel.helpers;
import org.apache.poi.ss.usermodel.RichTextString;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRElt;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRPrElt;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
public class RichTextStringHelper {
public static void convertToRst(RichTextString string, CTRst text) {
// TODO: implement RichTextString to Rst conversion
text.setT(string.getString());
}
public static RichTextString convertFromRst(CTRst ctText) {
if(ctText == null) {
return new XSSFRichTextString("");
}
if(ctText.getT() != null) {
return new XSSFRichTextString(ctText.getT());
}
// Grab all the text
StringBuffer t = new StringBuffer();
for(CTRElt r : ctText.getRArray()) {
t.append( r.getT() );
}
XSSFRichTextString rtxt = new XSSFRichTextString(t.toString());
// Now get all the formatting
// TODO: implement Rst/RpR to RichTextString conversion
for(CTRElt r : ctText.getRArray()) {
// Formatting info comes from rPr
CTRPrElt rPr = r.getRPr();
rPr.getRFontArray();
}
return rtxt;
}
}

View File

@ -877,8 +877,23 @@ public class TestXSSFSheet extends BaseTestSheet {
assertNotNull(comment1); assertNotNull(comment1);
assertEquals("/xl/comments1.xml", comment1.getPackageRelationship().getTargetURI().toString()); assertEquals("/xl/comments1.xml", comment1.getPackageRelationship().getTargetURI().toString());
assertSame(comment1, sheet1.getCommentsTable(true)); assertSame(comment1, sheet1.getCommentsTable(true));
} }
public void testCreateRow(){
XSSFWorkbook workbook = new XSSFWorkbook();
XSSFSheet sheet = workbook.createSheet();
CTWorksheet wsh = sheet.getCTWorksheet();
assertEquals(0, wsh.getSheetData().sizeOfRowArray());
XSSFRow row1 = sheet.createRow(1);
row1.createCell(1);
row1.createCell(2);
assertEquals(1, wsh.getSheetData().sizeOfRowArray());
assertEquals(2, wsh.getSheetData().getRowArray(0).sizeOfCArray());
//re-creating a row does NOT add extra data to the parent
sheet.createRow(1);
assertEquals(1, wsh.getSheetData().sizeOfRowArray());
//existing cells are invalidated
assertEquals(0, wsh.getSheetData().getRowArray(0).sizeOfCArray());
}
} }

View File

@ -0,0 +1,197 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.util;
import junit.framework.TestCase;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.util.CellReference;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.*;
import java.util.List;
import java.util.ArrayList;
/**
* Mixed utilities for testing memory usage in XSSF
*
* @author Yegor Kozlov
*/
public class MemoryUsage extends TestCase {
private static final int NUM_COLUMNS = 255;
/**
* Generate a spreadsheet until OutOfMemoryError
* <p>
* cells in even columns are numbers, cells in odd columns are strings
* </p>
*
* @param wb the workbook to write to
* @param numCols the number of columns in a row
*/
public static void mixedSpreadsheet(Workbook wb, int numCols){
System.out.println("Testing " + wb.getClass().getName());
System.out.println("Memory: " + Runtime.getRuntime().totalMemory()/(1024*1024) + "MB");
int i=0, cnt=0;
try {
Sheet sh = wb.createSheet();
for(i=0; ; i++){
Row row = sh.createRow(i);
for(int j=0; j < numCols; j++){
Cell cell = row.createCell(j);
if(j % 2 == 0) cell.setCellValue(j);
else cell.setCellValue(new CellReference(j, i).formatAsString());
cnt++;
}
}
} catch (OutOfMemoryError er){
System.out.println("Failed at row=" + i + ", objects : " + cnt);
}
System.out.println("Memory: " + Runtime.getRuntime().totalMemory()/(1024*1024) + "MB");
}
/**
* Generate a spreadsheet who's all cell values are numbers.
* The data is generated until OutOfMemoryError.
* <p>
* as compared to {@link #mixedSpreadsheet(org.apache.poi.ss.usermodel.Workbook, int)},
* this method does not set string values and, hence, does not invole the Shared Strings Table.
* </p>
*
* @param wb the workbook to write to
* @param numCols the number of columns in a row
*/
public static void numberSpreadsheet(Workbook wb, int numCols){
System.out.println("Testing " + wb.getClass().getName());
System.out.println("Memory: " + Runtime.getRuntime().totalMemory()/(1024*1024) + "MB");
int i=0, cnt=0;
try {
Sheet sh = wb.createSheet();
for(i=0; ; i++){
Row row = sh.createRow(i);
for(int j=0; j < numCols; j++){
Cell cell = row.createCell(j);
cell.setCellValue(j);
cnt++;
}
}
} catch (OutOfMemoryError er){
System.out.println("Failed at row=" + i + ", objects : " + cnt);
}
System.out.println("Memory: " + Runtime.getRuntime().totalMemory()/(1024*1024) + "MB");
}
/**
* Generate a spreadsheet until OutOfMemoryError using low-level OOXML XmlBeans.
* Similar to {@link #numberSpreadsheet(org.apache.poi.ss.usermodel.Workbook, int)}
*
* <p>
*
* @param numCols the number of columns in a row
*/
public static void xmlBeans(int numCols) {
int i = 0, cnt = 0;
System.out.println("Memory: " + Runtime.getRuntime().totalMemory() / (1024 * 1024) + "MB");
CTWorksheet sh = CTWorksheet.Factory.newInstance();
CTSheetData data = sh.addNewSheetData();
try {
for (i = 0; ; i++) {
CTRow row = data.addNewRow();
row.setR(i);
for (int j = 0; j < numCols; j++) {
CTCell cell = row.addNewC();
cell.setT(STCellType.N);
cell.setV(String.valueOf(j));
cnt++;
}
}
} catch (OutOfMemoryError er) {
System.out.println("Failed at row=" + i + ", objects: " + cnt);
}
System.out.println("Memory: " + Runtime.getRuntime().totalMemory() / (1024 * 1024) + "MB");
}
/**
* Generate detached (parentless) Xml beans until OutOfMemoryError
*
* @see #testXmlAttached()
*/
public void testXmlDetached(){
List<CTRow> rows = new ArrayList<CTRow>();
int i = 0;
try {
for(;;){
//create a standalone CTRow bean
CTRow r = CTRow.Factory.newInstance();
r.setR(++i);
rows.add(r);
}
} catch (OutOfMemoryError er) {
System.out.println("Failed at row=" + i);
}
System.out.println("Memory: " + Runtime.getRuntime().totalMemory() / (1024 * 1024) + "MB");
}
/**
* Generate atatched (having a parent bean) Xml beans until OutOfMemoryError.
* This is MUCH more memory-efficient than {@link #testXmlDetached()}
*
* @see #testXmlAttached()
*/
public void testXmlAttached(){
List<CTRow> rows = new ArrayList<CTRow>();
int i = 0;
//top-level element in sheet.xml
CTWorksheet sh = CTWorksheet.Factory.newInstance();
CTSheetData data = sh.addNewSheetData();
try {
for(;;){
//create CTRow attached to the parent object
CTRow r = data.addNewRow();
r.setR(++i);
rows.add(r);
}
} catch (OutOfMemoryError er) {
System.out.println("Failed at row=" + i);
}
System.out.println("Memory: " + Runtime.getRuntime().totalMemory() / (1024 * 1024) + "MB");
}
public void testMixedHSSF(){
numberSpreadsheet(new HSSFWorkbook(), NUM_COLUMNS);
}
public void testMixedXSSF(){
numberSpreadsheet(new XSSFWorkbook(), NUM_COLUMNS);
}
public void testNumberHSSF(){
numberSpreadsheet(new HSSFWorkbook(), NUM_COLUMNS);
}
public void testNumberXSSF(){
numberSpreadsheet(new XSSFWorkbook(), NUM_COLUMNS);
}
}