From 6216c511a952b455396576617663be37a11d6060 Mon Sep 17 00:00:00 2001 From: Yegor Kozlov Date: Mon, 30 Nov 2009 14:09:03 +0000 Subject: [PATCH] memory usage optimization in XSSF - avoid creating parentless xml beans git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@885429 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/status.xml | 1 + .../poi/xssf/model/SharedStringsTable.java | 20 +- .../apache/poi/xssf/usermodel/XSSFRow.java | 19 +- .../apache/poi/xssf/usermodel/XSSFSheet.java | 13 +- .../poi/xssf/usermodel/XSSFWorkbook.java | 9 +- .../helpers/RichTextStringHelper.java | 56 ----- .../poi/xssf/usermodel/TestXSSFSheet.java | 19 +- .../org/apache/poi/xssf/util/MemoryUsage.java | 197 ++++++++++++++++++ 8 files changed, 260 insertions(+), 74 deletions(-) delete mode 100644 src/ooxml/java/org/apache/poi/xssf/usermodel/helpers/RichTextStringHelper.java create mode 100755 src/ooxml/testcases/org/apache/poi/xssf/util/MemoryUsage.java diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 9109c2b37..26d634124 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + memory usage optimization in XSSF - avoid creating parentless xml beans 47188 - avoid corruption of workbook when adding cell comments 48106 - improved work with cell comments in XSSF Add support for creating SummaryInformation and DocumentSummaryInformation properties diff --git a/src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTable.java b/src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTable.java index 8287df55b..ac648c142 100644 --- a/src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTable.java +++ b/src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTable.java @@ -84,8 +84,12 @@ public class SharedStringsTable extends POIXMLDocumentPart { */ private int uniqueCount; + private SstDocument _sstDoc; + public SharedStringsTable() { super(); + _sstDoc = SstDocument.Factory.newInstance(); + _sstDoc.addNewSst(); } public SharedStringsTable(PackagePart part, PackageRelationship rel) throws IOException { @@ -102,7 +106,8 @@ public class SharedStringsTable extends POIXMLDocumentPart { public void readFrom(InputStream is) throws IOException { try { int cnt = 0; - CTSst sst = SstDocument.Factory.parse(is).getSst(); + _sstDoc = SstDocument.Factory.parse(is); + CTSst sst = _sstDoc.getSst(); count = (int)sst.getCount(); uniqueCount = (int)sst.getUniqueCount(); for (CTRst st : sst.getSiArray()) { @@ -163,10 +168,14 @@ public class SharedStringsTable extends POIXMLDocumentPart { if (stmap.containsKey(s)) { return stmap.get(s); } + uniqueCount++; + //create a CTRst bean attached to this SstDocument and copy the argument CTRst into it + CTRst newSt = _sstDoc.getSst().addNewSi(); + newSt.set(st); int idx = strings.size(); stmap.put(s, idx); - strings.add(st); + strings.add(newSt); return idx; } /** @@ -188,14 +197,11 @@ public class SharedStringsTable extends POIXMLDocumentPart { XmlOptions options = new XmlOptions(DEFAULT_XML_OPTIONS); //re-create the sst table every time saving a workbook - SstDocument doc = SstDocument.Factory.newInstance(); - CTSst sst = doc.addNewSst(); + CTSst sst = _sstDoc.getSst(); sst.setCount(count); sst.setUniqueCount(uniqueCount); - CTRst[] ctr = strings.toArray(new CTRst[strings.size()]); - sst.setSiArray(ctr); - doc.save(out, options); + _sstDoc.save(out, options); } @Override diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRow.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRow.java index d81a69240..91bb3e4c8 100644 --- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRow.java +++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRow.java @@ -35,7 +35,7 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow; * High level representation of a row of a spreadsheet. */ public class XSSFRow implements Row, Comparable { - private static final POILogger logger = POILogFactory.getLogger(XSSFRow.class); + private static final POILogger _logger = POILogFactory.getLogger(XSSFRow.class); /** * the xml bean containing all cell definitions for this row @@ -46,7 +46,7 @@ public class XSSFRow implements Row, Comparable { * Cells of this row keyed by their column indexes. * The TreeMap ensures that the cells are ordered by columnIndex in the ascending order. */ - private final TreeMap _cells; + private final TreeMap _cells; /** * the parent sheet @@ -62,7 +62,7 @@ public class XSSFRow implements Row, Comparable { protected XSSFRow(CTRow row, XSSFSheet sheet) { _row = row; _sheet = sheet; - _cells = new TreeMap(); + _cells = new TreeMap(); for (CTCell c : row.getCArray()) { XSSFCell cell = new XSSFCell(this, c); _cells.put(cell.getColumnIndex(), cell); @@ -91,7 +91,7 @@ public class XSSFRow implements Row, Comparable { * @return an iterator over cells in this row. */ public Iterator cellIterator() { - return _cells.values().iterator(); + return (Iterator)(Iterator)_cells.values().iterator(); } /** @@ -160,8 +160,15 @@ public class XSSFRow implements Row, Comparable { * @see Cell#CELL_TYPE_STRING */ public XSSFCell createCell(int columnIndex, int type) { - CTCell ctcell = CTCell.Factory.newInstance(); - XSSFCell xcell = new XSSFCell(this, ctcell); + CTCell ctCell; + XSSFCell prev = _cells.get(columnIndex); + if(prev != null){ + ctCell = prev.getCTCell(); + ctCell.set(CTCell.Factory.newInstance()); + } else { + ctCell = _row.addNewC(); + } + XSSFCell xcell = new XSSFCell(this, ctCell); xcell.setCellNum(columnIndex); if (type != Cell.CELL_TYPE_BLANK) { xcell.setCellType(type); diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFSheet.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFSheet.java index 2de3c16b0..318f2f6dd 100644 --- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFSheet.java +++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFSheet.java @@ -69,8 +69,10 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.*; public class XSSFSheet extends POIXMLDocumentPart implements Sheet { private static final POILogger logger = POILogFactory.getLogger(XSSFSheet.class); + //TODO make the two variable below private! protected CTSheet sheet; protected CTWorksheet worksheet; + private TreeMap rows; private List hyperlinks; private ColumnHelper columnHelper; @@ -422,10 +424,17 @@ public class XSSFSheet extends POIXMLDocumentPart implements Sheet { * @see #removeRow(org.apache.poi.ss.usermodel.Row) */ public XSSFRow createRow(int rownum) { - CTRow ctRow = CTRow.Factory.newInstance(); + CTRow ctRow; + XSSFRow prev = rows.get(rownum); + if(prev != null){ + ctRow = prev.getCTRow(); + ctRow.set(CTRow.Factory.newInstance()); + } else { + ctRow = worksheet.getSheetData().addNewRow(); + } XSSFRow r = new XSSFRow(ctRow, this); r.setRowNum(rownum); - rows.put(r.getRowNum(), r); + rows.put(rownum, r); return r; } diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java index 1de055db3..9af7515af 100644 --- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java +++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java @@ -148,6 +148,12 @@ public class XSSFWorkbook extends POIXMLDocument implements Workbook, Iterable + * cells in even columns are numbers, cells in odd columns are strings + *

+ * + * @param wb the workbook to write to + * @param numCols the number of columns in a row + */ + public static void mixedSpreadsheet(Workbook wb, int numCols){ + + System.out.println("Testing " + wb.getClass().getName()); + System.out.println("Memory: " + Runtime.getRuntime().totalMemory()/(1024*1024) + "MB"); + int i=0, cnt=0; + try { + Sheet sh = wb.createSheet(); + for(i=0; ; i++){ + Row row = sh.createRow(i); + for(int j=0; j < numCols; j++){ + Cell cell = row.createCell(j); + if(j % 2 == 0) cell.setCellValue(j); + else cell.setCellValue(new CellReference(j, i).formatAsString()); + cnt++; + } + } + } catch (OutOfMemoryError er){ + System.out.println("Failed at row=" + i + ", objects : " + cnt); + } + System.out.println("Memory: " + Runtime.getRuntime().totalMemory()/(1024*1024) + "MB"); + } + + /** + * Generate a spreadsheet who's all cell values are numbers. + * The data is generated until OutOfMemoryError. + *

+ * as compared to {@link #mixedSpreadsheet(org.apache.poi.ss.usermodel.Workbook, int)}, + * this method does not set string values and, hence, does not invole the Shared Strings Table. + *

+ * + * @param wb the workbook to write to + * @param numCols the number of columns in a row + */ + public static void numberSpreadsheet(Workbook wb, int numCols){ + + System.out.println("Testing " + wb.getClass().getName()); + System.out.println("Memory: " + Runtime.getRuntime().totalMemory()/(1024*1024) + "MB"); + int i=0, cnt=0; + try { + Sheet sh = wb.createSheet(); + for(i=0; ; i++){ + Row row = sh.createRow(i); + for(int j=0; j < numCols; j++){ + Cell cell = row.createCell(j); + cell.setCellValue(j); + cnt++; + } + } + } catch (OutOfMemoryError er){ + System.out.println("Failed at row=" + i + ", objects : " + cnt); + } + System.out.println("Memory: " + Runtime.getRuntime().totalMemory()/(1024*1024) + "MB"); + } + + /** + * Generate a spreadsheet until OutOfMemoryError using low-level OOXML XmlBeans. + * Similar to {@link #numberSpreadsheet(org.apache.poi.ss.usermodel.Workbook, int)} + * + *

+ * + * @param numCols the number of columns in a row + */ + public static void xmlBeans(int numCols) { + int i = 0, cnt = 0; + System.out.println("Memory: " + Runtime.getRuntime().totalMemory() / (1024 * 1024) + "MB"); + + CTWorksheet sh = CTWorksheet.Factory.newInstance(); + CTSheetData data = sh.addNewSheetData(); + try { + for (i = 0; ; i++) { + CTRow row = data.addNewRow(); + row.setR(i); + for (int j = 0; j < numCols; j++) { + CTCell cell = row.addNewC(); + cell.setT(STCellType.N); + cell.setV(String.valueOf(j)); + cnt++; + } + } + } catch (OutOfMemoryError er) { + System.out.println("Failed at row=" + i + ", objects: " + cnt); + } + System.out.println("Memory: " + Runtime.getRuntime().totalMemory() / (1024 * 1024) + "MB"); + } + + /** + * Generate detached (parentless) Xml beans until OutOfMemoryError + * + * @see #testXmlAttached() + */ + public void testXmlDetached(){ + List rows = new ArrayList(); + int i = 0; + try { + for(;;){ + //create a standalone CTRow bean + CTRow r = CTRow.Factory.newInstance(); + r.setR(++i); + rows.add(r); + } + } catch (OutOfMemoryError er) { + System.out.println("Failed at row=" + i); + } + System.out.println("Memory: " + Runtime.getRuntime().totalMemory() / (1024 * 1024) + "MB"); + } + + /** + * Generate atatched (having a parent bean) Xml beans until OutOfMemoryError. + * This is MUCH more memory-efficient than {@link #testXmlDetached()} + * + * @see #testXmlAttached() + */ + public void testXmlAttached(){ + List rows = new ArrayList(); + int i = 0; + //top-level element in sheet.xml + CTWorksheet sh = CTWorksheet.Factory.newInstance(); + CTSheetData data = sh.addNewSheetData(); + try { + for(;;){ + //create CTRow attached to the parent object + CTRow r = data.addNewRow(); + r.setR(++i); + rows.add(r); + } + } catch (OutOfMemoryError er) { + System.out.println("Failed at row=" + i); + } + System.out.println("Memory: " + Runtime.getRuntime().totalMemory() / (1024 * 1024) + "MB"); + } + + public void testMixedHSSF(){ + numberSpreadsheet(new HSSFWorkbook(), NUM_COLUMNS); + } + + public void testMixedXSSF(){ + numberSpreadsheet(new XSSFWorkbook(), NUM_COLUMNS); + } + + public void testNumberHSSF(){ + numberSpreadsheet(new HSSFWorkbook(), NUM_COLUMNS); + } + + public void testNumberXSSF(){ + numberSpreadsheet(new XSSFWorkbook(), NUM_COLUMNS); + } + +} \ No newline at end of file