diff --git a/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java b/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java
index 2238b39b8..2b392c0e3 100644
--- a/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java
+++ b/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java
@@ -31,7 +31,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
* processWorkbookEvents along with a request.
*
* This will cause your file to be processed a record at a time. Each record with
- * a static id matching one that you have registed in your HSSFRequest will be passed
+ * a static id matching one that you have registered in your HSSFRequest will be passed
* to your associated HSSFListener.
*
* @see org.apache.poi.hssf.dev.EFHSSF
@@ -39,115 +39,98 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
* @author Andrew C. Oliver (acoliver at apache dot org)
* @author Carey Sublette (careysub@earthling.net)
*/
+public class HSSFEventFactory {
+ /** Creates a new instance of HSSFEventFactory */
+ public HSSFEventFactory() {
+ // no instance fields
+ }
-public class HSSFEventFactory
-{
- /** Creates a new instance of HSSFEventFactory */
-
- public HSSFEventFactory()
- {
- }
-
- /**
- * Processes a file into essentially record events.
- *
- * @param req an Instance of HSSFRequest which has your registered listeners
- * @param fs a POIFS filesystem containing your workbook
- */
-
- public void processWorkbookEvents(HSSFRequest req, POIFSFileSystem fs)
- throws IOException
- {
- InputStream in = fs.createDocumentInputStream("Workbook");
-
- processEvents(req, in);
- }
-
- /**
+ /**
* Processes a file into essentially record events.
*
- * @param req an Instance of HSSFRequest which has your registered listeners
- * @param fs a POIFS filesystem containing your workbook
- * @return numeric user-specified result code.
+ * @param req an Instance of HSSFRequest which has your registered listeners
+ * @param fs a POIFS filesystem containing your workbook
*/
+ public void processWorkbookEvents(HSSFRequest req, POIFSFileSystem fs) throws IOException {
+ InputStream in = fs.createDocumentInputStream("Workbook");
+ processEvents(req, in);
+ }
+
+ /**
+ * Processes a file into essentially record events.
+ *
+ * @param req an Instance of HSSFRequest which has your registered listeners
+ * @param fs a POIFS filesystem containing your workbook
+ * @return numeric user-specified result code.
+ */
public short abortableProcessWorkbookEvents(HSSFRequest req, POIFSFileSystem fs)
- throws IOException, HSSFUserException
- {
+ throws IOException, HSSFUserException {
InputStream in = fs.createDocumentInputStream("Workbook");
return abortableProcessEvents(req, in);
- }
+ }
- /**
- * Processes a DocumentInputStream into essentially Record events.
- *
- * If an AbortableHSSFListener
causes a halt to processing during this call
- * the method will return just as with abortableProcessEvents
, but no
- * user code or HSSFUserException
will be passed back.
- *
- * @see org.apache.poi.poifs.filesystem.POIFSFileSystem#createDocumentInputStream(String)
- * @param req an Instance of HSSFRequest which has your registered listeners
- * @param in a DocumentInputStream obtained from POIFS's POIFSFileSystem object
- */
-
- public void processEvents(HSSFRequest req, InputStream in)
- throws IOException
- {
- try
- {
+ /**
+ * Processes a DocumentInputStream into essentially Record events.
+ *
+ * If an AbortableHSSFListener
causes a halt to processing during this call
+ * the method will return just as with abortableProcessEvents
, but no
+ * user code or HSSFUserException
will be passed back.
+ *
+ * @see org.apache.poi.poifs.filesystem.POIFSFileSystem#createDocumentInputStream(String)
+ * @param req an Instance of HSSFRequest which has your registered listeners
+ * @param in a DocumentInputStream obtained from POIFS's POIFSFileSystem object
+ */
+ public void processEvents(HSSFRequest req, InputStream in) {
+ try {
genericProcessEvents(req, new RecordInputStream(in));
+ } catch (HSSFUserException hue) {
+ /*If an HSSFUserException user exception is thrown, ignore it.*/
}
- catch (HSSFUserException hue)
- {/*If an HSSFUserException user exception is thrown, ignore it.*/ }
}
- /**
- * Processes a DocumentInputStream into essentially Record events.
- *
- * @see org.apache.poi.poifs.filesystem.POIFSFileSystem#createDocumentInputStream(String)
- * @param req an Instance of HSSFRequest which has your registered listeners
- * @param in a DocumentInputStream obtained from POIFS's POIFSFileSystem object
- * @return numeric user-specified result code.
- */
-
- public short abortableProcessEvents(HSSFRequest req, InputStream in)
- throws IOException, HSSFUserException
- {
- return genericProcessEvents(req, new RecordInputStream(in));
- }
-
- /**
+ /**
* Processes a DocumentInputStream into essentially Record events.
*
* @see org.apache.poi.poifs.filesystem.POIFSFileSystem#createDocumentInputStream(String)
- * @param req an Instance of HSSFRequest which has your registered listeners
- * @param in a DocumentInputStream obtained from POIFS's POIFSFileSystem object
- * @return numeric user-specified result code.
+ * @param req an Instance of HSSFRequest which has your registered listeners
+ * @param in a DocumentInputStream obtained from POIFS's POIFSFileSystem object
+ * @return numeric user-specified result code.
*/
+ public short abortableProcessEvents(HSSFRequest req, InputStream in)
+ throws HSSFUserException {
+ return genericProcessEvents(req, new RecordInputStream(in));
+ }
+ /**
+ * Processes a DocumentInputStream into essentially Record events.
+ *
+ * @see org.apache.poi.poifs.filesystem.POIFSFileSystem#createDocumentInputStream(String)
+ * @param req an Instance of HSSFRequest which has your registered listeners
+ * @param in a DocumentInputStream obtained from POIFS's POIFSFileSystem object
+ * @return numeric user-specified result code.
+ */
protected short genericProcessEvents(HSSFRequest req, RecordInputStream in)
- throws IOException, HSSFUserException
- {
- boolean going = true;
+ throws HSSFUserException {
short userCode = 0;
- Record r = null;
-
+
// Create a new RecordStream and use that
- RecordFactoryInputStream recordStream = new RecordFactoryInputStream(in);
-
+ RecordFactoryInputStream recordStream = new RecordFactoryInputStream(in, false);
+
// Process each record as they come in
- while(going) {
- r = recordStream.nextRecord();
- if(r != null) {
- userCode = req.processRecord(r);
- if (userCode != 0) break;
- } else {
- going = false;
+ while(true) {
+ Record r = recordStream.nextRecord();
+ if(r == null) {
+ break;
+ }
+ userCode = req.processRecord(r);
+ if (userCode != 0) {
+ break;
}
}
-
+
// All done, return our last code
return userCode;
- }
+ }
}
diff --git a/src/java/org/apache/poi/hssf/record/RecordFactory.java b/src/java/org/apache/poi/hssf/record/RecordFactory.java
index 53213dfc1..f45860667 100644
--- a/src/java/org/apache/poi/hssf/record/RecordFactory.java
+++ b/src/java/org/apache/poi/hssf/record/RecordFactory.java
@@ -369,12 +369,11 @@ public final class RecordFactory {
public static List createRecords(InputStream in) throws RecordFormatException {
List records = new ArrayList(NUM_RECORDS);
- RecordFactoryInputStream recStream = new RecordFactoryInputStream(new RecordInputStream(in));
- recStream.setIncludeContinueRecords(true);
+ RecordFactoryInputStream recStream = new RecordFactoryInputStream(new RecordInputStream(in), true);
- Record record;
+ Record record;
while ((record = recStream.nextRecord())!=null) {
- records.add(record);
+ records.add(record);
}
return records;
diff --git a/src/java/org/apache/poi/hssf/record/RecordFactoryInputStream.java b/src/java/org/apache/poi/hssf/record/RecordFactoryInputStream.java
index 19f2ca45c..541dfd2dc 100755
--- a/src/java/org/apache/poi/hssf/record/RecordFactoryInputStream.java
+++ b/src/java/org/apache/poi/hssf/record/RecordFactoryInputStream.java
@@ -19,10 +19,6 @@ package org.apache.poi.hssf.record;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
-import java.util.Arrays;
-import java.util.LinkedList;
-import java.util.List;
-
/**
* A stream based way to get at complete records, with
* as low a memory footprint as possible.
@@ -34,203 +30,209 @@ import java.util.List;
* them, but this does allow for a "pull" style of coding.
*/
public class RecordFactoryInputStream {
- private final RecordInputStream recStream;
- /**
- * Have we returned all the records there are?
- */
- private boolean complete = false;
+ private final RecordInputStream _recStream;
+ private final boolean _shouldIncludeContinueRecords;
- /**
- * Sometimes we end up with a bunch of
- * records. When we do, these should
- * be returned before the next normal
- * record processing occurs (i.e. before
- * we check for continue records and
- * return rec)
- */
- private final LinkedList bonusRecords = new LinkedList();
+ /**
+ * Temporarily stores a group of {@link NumberRecord}s. This is uses when the most
+ * recently read underlying record is a {@link MulRKRecord}
+ */
+ private NumberRecord[] _multipleNumberRecords;
- /**
- * The most recent record that we gave to the user
- */
- private Record lastRecord = null;
- /**
- * The most recent DrawingRecord seen
- */
- private DrawingRecord lastDrawingRecord = new DrawingRecord();
+ /**
+ * used to help iterating over multiple number records
+ */
+ private int _multipleNumberRecordIndex = -1;
- private int bofDepth = 0;
+ /**
+ * The most recent record that we gave to the user
+ */
+ private Record _lastRecord = null;
+ /**
+ * The most recent DrawingRecord seen
+ */
+ private DrawingRecord _lastDrawingRecord = new DrawingRecord();
- private boolean lastRecordWasEOFLevelZero = false;
+ private int _bofDepth;
- private boolean includeContinueRecords = false;
+ private boolean _lastRecordWasEOFLevelZero;
- public RecordFactoryInputStream(RecordInputStream inp) {
- recStream = inp;
- }
- /**
- * Returns the next (complete) record from the
- * stream, or null if there are no more.
- */
- public Record nextRecord() {
- Record r = null;
+ /**
+ * @param shouldIncludeContinueRecords caller can pass false
if loose
+ * {@link ContinueRecord}s should be skipped (this is sometimes useful in event based
+ * processing).
+ */
+ public RecordFactoryInputStream(RecordInputStream inp, boolean shouldIncludeContinueRecords) {
+ _recStream = inp;
+ _shouldIncludeContinueRecords = shouldIncludeContinueRecords;
- // Loop until we get something
- while (r == null && !complete) {
- // Are there any bonus records that we need to
- // return?
- r = getBonusRecord();
+ /*
+ * How to recognise end of stream?
+ * In the best case, the underlying input stream (in) ends just after the last EOF record
+ * Usually however, the stream is padded with an arbitrary byte count. Excel and most apps
+ * reliably use zeros for padding and if this were always the case, this code could just
+ * skip all the (zero sized) records with sid==0. However, bug 46987 shows a file with
+ * non-zero padding that is read OK by Excel (Excel also fixes the padding).
+ *
+ * So to properly detect the workbook end of stream, this code has to identify the last
+ * EOF record. This is not so easy because the worbook bof+eof pair do not bracket the
+ * whole stream. The worksheets follow the workbook, but it is not easy to tell how many
+ * sheet sub-streams should be present. Hence we are looking for an EOF record that is not
+ * immediately followed by a BOF record. One extra complication is that bof+eof sub-
+ * streams can be nested within worksheet streams and it's not clear in these cases what
+ * record might follow any EOF record. So we also need to keep track of the bof/eof
+ * nesting level.
+ */
+ _bofDepth=0;
+ _lastRecordWasEOFLevelZero = false;
+ }
- // If not, ask for the next real record
- if (r == null) {
- r = getNextRecord();
- }
- }
+ /**
+ * Returns the next (complete) record from the
+ * stream, or null if there are no more.
+ */
+ public Record nextRecord() {
+ Record r;
+ r = getNextMultipleNumberRecord();
+ if (r != null) {
+ // found a NumberRecord (expanded from a recent MULRK record)
+ return r;
+ }
+ while (true) {
+ if (!_recStream.hasNextRecord()) {
+ // recStream is exhausted;
+ return null;
+ }
- // All done
- return r;
- }
+ // step underlying RecordInputStream to the next record
+ _recStream.nextRecord();
- /**
- * If there are any "bonus" records, that should
- * be returned before processing new ones,
- * grabs the next and returns it.
- * If not, returns null;
- */
- private Record getBonusRecord() {
- if (!bonusRecords.isEmpty()) {
- return (Record) bonusRecords.removeFirst();
- }
- return null;
- }
+ if (_lastRecordWasEOFLevelZero) {
+ // Potential place for ending the workbook stream
+ // Check that the next record is not BOFRecord(0x0809)
+ // Normally the input stream contains only zero padding after the last EOFRecord,
+ // but bug 46987 suggests that the padding may be garbage.
+ // This code relies on the padding bytes not starting with BOFRecord.sid
+ if (_recStream.getSid() != BOFRecord.sid) {
+ return null;
+ }
+ // else - another sheet substream starting here
+ }
- /**
- * Returns the next available record, or null if
- * this pass didn't return a record that's
- * suitable for returning (eg was a continue record).
- */
- private Record getNextRecord() {
- /*
- * How to recognise end of stream?
- * In the best case, the underlying input stream (in) ends just after the last EOF record
- * Usually however, the stream is padded with an arbitrary byte count. Excel and most apps
- * reliably use zeros for padding and if this were always the case, this code could just
- * skip all the (zero sized) records with sid==0. However, bug 46987 shows a file with
- * non-zero padding that is read OK by Excel (Excel also fixes the padding).
- *
- * So to properly detect the workbook end of stream, this code has to identify the last
- * EOF record. This is not so easy because the worbook bof+eof pair do not bracket the
- * whole stream. The worksheets follow the workbook, but it is not easy to tell how many
- * sheet sub-streams should be present. Hence we are looking for an EOF record that is not
- * immediately followed by a BOF record. One extra complication is that bof+eof sub-
- * streams can be nested within worksheet streams and it's not clear in these cases what
- * record might follow any EOF record. So we also need to keep track of the bof/eof
- * nesting level.
- */
+ r = readNextRecord();
+ if (r == null) {
+ // some record types may get skipped (e.g. DBCellRecord and ContinueRecord)
+ continue;
+ }
+ return r;
+ }
+ }
- if (recStream.hasNextRecord()) {
- // Grab our next record
- recStream.nextRecord();
+ /**
+ * @return the next {@link NumberRecord} from the multiple record group as expanded from
+ * a recently read {@link MulRKRecord}. null
if not present.
+ */
+ private NumberRecord getNextMultipleNumberRecord() {
+ if (_multipleNumberRecords != null) {
+ int ix = _multipleNumberRecordIndex;
+ if (ix < _multipleNumberRecords.length) {
+ NumberRecord result = _multipleNumberRecords[ix];
+ _multipleNumberRecordIndex = ix + 1;
+ return result;
+ }
+ _multipleNumberRecordIndex = -1;
+ _multipleNumberRecords = null;
+ }
+ return null;
+ }
- if (lastRecordWasEOFLevelZero && recStream.getSid() != BOFRecord.sid) {
- // Normally InputStream (in) contains only zero padding after this point
- complete = true;
- return null;
- }
+ /**
+ * @return the next available record, or null
if
+ * this pass didn't return a record that's
+ * suitable for returning (eg was a continue record).
+ */
+ private Record readNextRecord() {
- Record record = RecordFactory.createSingleRecord(recStream);
- lastRecordWasEOFLevelZero = false;
+ Record record = RecordFactory.createSingleRecord(_recStream);
+ _lastRecordWasEOFLevelZero = false;
- if (record instanceof BOFRecord) {
- bofDepth++;
- return record;
- }
+ if (record instanceof BOFRecord) {
+ _bofDepth++;
+ return record;
+ }
- if (record instanceof EOFRecord) {
- bofDepth--;
- if (bofDepth < 1) {
- lastRecordWasEOFLevelZero = true;
- }
+ if (record instanceof EOFRecord) {
+ _bofDepth--;
+ if (_bofDepth < 1) {
+ _lastRecordWasEOFLevelZero = true;
+ }
- return record;
- }
+ return record;
+ }
- if (record instanceof DBCellRecord) {
- // Not needed by POI. Regenerated from scratch by POI when spreadsheet is written
- return null;
- }
+ if (record instanceof DBCellRecord) {
+ // Not needed by POI. Regenerated from scratch by POI when spreadsheet is written
+ return null;
+ }
- if (record instanceof RKRecord) {
- return RecordFactory.convertToNumberRecord((RKRecord) record);
- }
+ if (record instanceof RKRecord) {
+ return RecordFactory.convertToNumberRecord((RKRecord) record);
+ }
- if (record instanceof MulRKRecord) {
- NumberRecord[] records = RecordFactory.convertRKRecords((MulRKRecord) record);
+ if (record instanceof MulRKRecord) {
+ NumberRecord[] records = RecordFactory.convertRKRecords((MulRKRecord) record);
- List list = Arrays.asList(records);
- bonusRecords.addAll(list.subList(1, list.size()));
+ _multipleNumberRecords = records;
+ _multipleNumberRecordIndex = 1;
+ return records[0];
+ }
- return records[0];
- }
+ if (record.getSid() == DrawingGroupRecord.sid
+ && _lastRecord instanceof DrawingGroupRecord) {
+ DrawingGroupRecord lastDGRecord = (DrawingGroupRecord) _lastRecord;
+ lastDGRecord.join((AbstractEscherHolderRecord) record);
+ return null;
+ }
+ if (record.getSid() == ContinueRecord.sid) {
+ ContinueRecord contRec = (ContinueRecord) record;
- if (record.getSid() == DrawingGroupRecord.sid
- && lastRecord instanceof DrawingGroupRecord) {
- DrawingGroupRecord lastDGRecord = (DrawingGroupRecord) lastRecord;
- lastDGRecord.join((AbstractEscherHolderRecord) record);
- return null;
- } else if (record.getSid() == ContinueRecord.sid) {
- ContinueRecord contRec = (ContinueRecord) record;
-
- if (lastRecord instanceof ObjRecord || lastRecord instanceof TextObjectRecord) {
- // Drawing records have a very strange continue behaviour.
- //There can actually be OBJ records mixed between the continues.
- lastDrawingRecord.processContinueRecord(contRec.getData());
- //we must remember the position of the continue record.
- //in the serialization procedure the original structure of records must be preserved
- if (includeContinueRecords) {
- return record;
- } else {
- return null;
- }
- } else if (lastRecord instanceof DrawingGroupRecord) {
- ((DrawingGroupRecord) lastRecord).processContinueRecord(contRec.getData());
- return null;
- } else if (lastRecord instanceof DrawingRecord) {
- ((DrawingRecord) lastRecord).processContinueRecord(contRec.getData());
- return null;
- } else if (lastRecord instanceof UnknownRecord) {
- //Gracefully handle records that we don't know about,
- //that happen to be continued
- return record;
- } else if (lastRecord instanceof EOFRecord) {
- // This is really odd, but excel still sometimes
- // outputs a file like this all the same
- return record;
- } else {
- throw new RecordFormatException("Unhandled Continue Record");
- }
- } else {
- lastRecord = record;
- if (record instanceof DrawingRecord) {
- lastDrawingRecord = (DrawingRecord) record;
- }
-
- return record;
- }
-
- } else {
- // No more records
- complete = true;
- return null;
- }
- }
-
- /**
- * Return or not ContinueRecord in nextRecord
- */
- public void setIncludeContinueRecords(boolean includeContinueRecords) {
- this.includeContinueRecords = includeContinueRecords;
- }
-}
\ No newline at end of file
+ if (_lastRecord instanceof ObjRecord || _lastRecord instanceof TextObjectRecord) {
+ // Drawing records have a very strange continue behaviour.
+ //There can actually be OBJ records mixed between the continues.
+ _lastDrawingRecord.processContinueRecord(contRec.getData());
+ //we must remember the position of the continue record.
+ //in the serialization procedure the original structure of records must be preserved
+ if (_shouldIncludeContinueRecords) {
+ return record;
+ }
+ return null;
+ }
+ if (_lastRecord instanceof DrawingGroupRecord) {
+ ((DrawingGroupRecord) _lastRecord).processContinueRecord(contRec.getData());
+ return null;
+ }
+ if (_lastRecord instanceof DrawingRecord) {
+ ((DrawingRecord) _lastRecord).processContinueRecord(contRec.getData());
+ return null;
+ }
+ if (_lastRecord instanceof UnknownRecord) {
+ //Gracefully handle records that we don't know about,
+ //that happen to be continued
+ return record;
+ }
+ if (_lastRecord instanceof EOFRecord) {
+ // This is really odd, but excel still sometimes
+ // outputs a file like this all the same
+ return record;
+ }
+ throw new RecordFormatException("Unhandled Continue Record");
+ }
+ _lastRecord = record;
+ if (record instanceof DrawingRecord) {
+ _lastDrawingRecord = (DrawingRecord) record;
+ }
+ return record;
+ }
+}