From e11c26c29b196a4daa81c2ccbdcc4ce0d34c757c Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Fri, 1 Jul 2011 16:16:55 +0000 Subject: [PATCH] Apply patch from bug #51460 (with some related generics tweaks) - Improve HSSF performance when loading very long rows, by switching the CellValue array to an iterator git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1141970 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/status.xml | 3 +- .../apache/poi/hssf/model/InternalSheet.java | 35 ++++++++--- .../apache/poi/hssf/model/RecordStream.java | 8 +-- .../aggregates/RowRecordsAggregate.java | 10 ++++ .../aggregates/ValueRecordsAggregate.java | 60 ++++++++++++++++++- .../apache/poi/hssf/usermodel/HSSFSheet.java | 6 +- 6 files changed, 105 insertions(+), 17 deletions(-) diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 69572844b..293ba59dc 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,7 +34,8 @@ - 51444 - Prevent corrupted output when saving files created by LibreOffice 3.3 + 51460 - Improve HSSF performance when loading very long rows, by switching the CellValue array to an iterator + 51444 - Prevent corrupted output when saving files created by LibreOffice 3.3 51422 - Support using RecalcIdRecord to trigger a full formula recalculation on load 50474 - Example demonstrating how to update Excel workbook embedded in a WordprocessingML document 51431 - Avoid IndexOutOfBoundException when removing freeze panes in XSSF diff --git a/src/java/org/apache/poi/hssf/model/InternalSheet.java b/src/java/org/apache/poi/hssf/model/InternalSheet.java index d11548081..0ad87845d 100644 --- a/src/java/org/apache/poi/hssf/model/InternalSheet.java +++ b/src/java/org/apache/poi/hssf/model/InternalSheet.java @@ -360,13 +360,13 @@ public final class InternalSheet { private static final class RecordCloner implements RecordVisitor { - private final List _destList; + private final List _destList; - public RecordCloner(List destList) { + public RecordCloner(List destList) { _destList = destList; } public void visitRecord(Record r) { - _destList.add((RecordBase)r.clone()); + _destList.add((Record)r.clone()); } } @@ -378,7 +378,7 @@ public final class InternalSheet { * belongs to a sheet. */ public InternalSheet cloneSheet() { - List clonedRecords = new ArrayList(_records.size()); + List clonedRecords = new ArrayList(_records.size()); for (int i = 0; i < _records.size(); i++) { RecordBase rb = _records.get(i); if (rb instanceof RecordAggregate) { @@ -723,10 +723,10 @@ public final class InternalSheet { public void removeRow(RowRecord row) { _rowsAggregate.removeRow(row); } - + /** - * get the NEXT value record (from LOC). The first record that is a value record - * (starting at LOC) will be returned. + * Get all the value records (from LOC). Records will be returned from the first + * record (starting at LOC) which is a value record. * *

* This method is "loc" sensitive. Meaning you need to set LOC to where you @@ -735,8 +735,27 @@ public final class InternalSheet { * at what this sets it to. For this method, set loc to dimsloc to start with, * subsequent calls will return values in (physical) sequence or NULL when you get to the end. * - * @return CellValueRecordInterface representing the next value record or NULL if there are no more + * @return Iterator of CellValueRecordInterface representing the value records */ + public Iterator getCellValueIterator(){ + return _rowsAggregate.getCellValueIterator(); + } + + /** + * Get all the value records (from LOC). Records will be returned from the first + * record (starting at LOC) which is a value record. + * + *

+ * This method is "loc" sensitive. Meaning you need to set LOC to where you + * want it to start searching. If you don't know do this: setLoc(getDimsLoc). + * When adding several rows you can just start at the last one by leaving loc + * at what this sets it to. For this method, set loc to dimsloc to start with, + * subsequent calls will return values in (physical) sequence or NULL when you get to the end. + * + * @return Array of CellValueRecordInterface representing the remaining value records + * @deprecated use {@link #getValueIterator()} instead + */ + @Deprecated public CellValueRecordInterface[] getValueRecords() { return _rowsAggregate.getValueRecords(); } diff --git a/src/java/org/apache/poi/hssf/model/RecordStream.java b/src/java/org/apache/poi/hssf/model/RecordStream.java index 8869a9cf0..3fbf96764 100644 --- a/src/java/org/apache/poi/hssf/model/RecordStream.java +++ b/src/java/org/apache/poi/hssf/model/RecordStream.java @@ -27,7 +27,7 @@ import org.apache.poi.hssf.record.Record; */ public final class RecordStream { - private final List _list; + private final List _list; private int _nextIndex; private int _countRead; private final int _endIx; @@ -35,14 +35,14 @@ public final class RecordStream { /** * Creates a RecordStream bounded by startIndex and endIndex */ - public RecordStream(List inputList, int startIndex, int endIx) { + public RecordStream(List inputList, int startIndex, int endIx) { _list = inputList; _nextIndex = startIndex; _endIx = endIx; _countRead = 0; } - public RecordStream(List records, int startIx) { + public RecordStream(List records, int startIx) { this(records, startIx, records.size()); } @@ -61,7 +61,7 @@ public final class RecordStream { /** * @return the {@link Class} of the next Record. null if this stream is exhausted. */ - public Class peekNextClass() { + public Class peekNextClass() { if(!hasNext()) { return null; } diff --git a/src/java/org/apache/poi/hssf/record/aggregates/RowRecordsAggregate.java b/src/java/org/apache/poi/hssf/record/aggregates/RowRecordsAggregate.java index 50092cc9a..e4e92cf70 100644 --- a/src/java/org/apache/poi/hssf/record/aggregates/RowRecordsAggregate.java +++ b/src/java/org/apache/poi/hssf/record/aggregates/RowRecordsAggregate.java @@ -447,7 +447,17 @@ public final class RowRecordsAggregate extends RecordAggregate { return startHidden; } + + /** + * Returns an iterator for the cell values + */ + public Iterator getCellValueIterator() { + return _valuesAgg.iterator(); + } + /** + * @deprecated use {@link #getCellValueIterator()} instead + */ public CellValueRecordInterface[] getValueRecords() { return _valuesAgg.getValueRecords(); } diff --git a/src/java/org/apache/poi/hssf/record/aggregates/ValueRecordsAggregate.java b/src/java/org/apache/poi/hssf/record/aggregates/ValueRecordsAggregate.java index 04d1ec0c3..70ad5227f 100644 --- a/src/java/org/apache/poi/hssf/record/aggregates/ValueRecordsAggregate.java +++ b/src/java/org/apache/poi/hssf/record/aggregates/ValueRecordsAggregate.java @@ -18,6 +18,7 @@ package org.apache.poi.hssf.record.aggregates; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; import org.apache.poi.hssf.model.RecordStream; @@ -40,7 +41,7 @@ import org.apache.poi.ss.formula.ptg.Ptg; * @author Glen Stampoultzis (glens at apache.org) * @author Jason Height (jheight at chariot dot net dot au) */ -public final class ValueRecordsAggregate { +public final class ValueRecordsAggregate implements Iterable { private static final int MAX_ROW_INDEX = 0XFFFF; private static final int INDEX_NOT_SET = -1; private int firstcell = INDEX_NOT_SET; @@ -301,10 +302,67 @@ public final class ValueRecordsAggregate { } } + /** + * iterator for CellValueRecordInterface + */ + class ValueIterator implements Iterator { + + int curRowIndex = 0, curColIndex = -1; + int nextRowIndex = 0, nextColIndex = -1; + + public ValueIterator() { + getNextPos(); + } + + void getNextPos() { + if (nextRowIndex >= records.length) + return; // no next already + + while (nextRowIndex < records.length) { + ++nextColIndex; + if (records[nextRowIndex] == null || nextColIndex >= records[nextRowIndex].length) { + ++nextRowIndex; + nextColIndex = -1; + continue; + } + + if (records[nextRowIndex][nextColIndex] != null) + return; // next cell found + } + // no next found + } + + public boolean hasNext() { + return nextRowIndex < records.length; + } + + public CellValueRecordInterface next() { + if (!hasNext()) + throw new IndexOutOfBoundsException("iterator has no next"); + + curRowIndex = nextRowIndex; + curColIndex = nextColIndex; + final CellValueRecordInterface ret = records[curRowIndex][curColIndex]; + getNextPos(); + return ret; + } + + public void remove() { + records[curRowIndex][curColIndex] = null; + } + } + + /** value iterator */ + public Iterator iterator() { + return new ValueIterator(); + } + /** * Gets all the cell records contained in this aggregate. * Note {@link BlankRecord}s appear separate (not in {@link MulBlankRecord}s). + * @deprecated use {@link #iterator()} instead */ + @Deprecated public CellValueRecordInterface[] getValueRecords() { List temp = new ArrayList(); diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java b/src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java index 77eada349..03d337493 100644 --- a/src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java @@ -142,7 +142,7 @@ public final class HSSFSheet implements org.apache.poi.ss.usermodel.Sheet { row = sheet.getNextRow(); } - CellValueRecordInterface[] cvals = sheet.getValueRecords(); + Iterator iter = sheet.getCellValueIterator(); long timestart = System.currentTimeMillis(); if (log.check( POILogger.DEBUG )) @@ -151,8 +151,8 @@ public final class HSSFSheet implements org.apache.poi.ss.usermodel.Sheet { HSSFRow lastrow = null; // Add every cell to its row - for (int i = 0; i < cvals.length; i++) { - CellValueRecordInterface cval = cvals[i]; + while (iter.hasNext()) { + CellValueRecordInterface cval = iter.next(); long cellstart = System.currentTimeMillis(); HSSFRow hrow = lastrow;