code improvements to RecordFactoryInputStream

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@801850 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Josh Micich 2009-08-07 00:21:00 +00:00
parent acc07e2d6b
commit 755b86af67
3 changed files with 252 additions and 268 deletions

View File

@ -31,7 +31,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
* processWorkbookEvents along with a request.
*
* This will cause your file to be processed a record at a time. Each record with
* a static id matching one that you have registed in your HSSFRequest will be passed
* a static id matching one that you have registered in your HSSFRequest will be passed
* to your associated HSSFListener.
*
* @see org.apache.poi.hssf.dev.EFHSSF
@ -39,13 +39,10 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
* @author Andrew C. Oliver (acoliver at apache dot org)
* @author Carey Sublette (careysub@earthling.net)
*/
public class HSSFEventFactory
{
public class HSSFEventFactory {
/** Creates a new instance of HSSFEventFactory */
public HSSFEventFactory()
{
public HSSFEventFactory() {
// no instance fields
}
/**
@ -54,10 +51,7 @@ public class HSSFEventFactory
* @param req an Instance of HSSFRequest which has your registered listeners
* @param fs a POIFS filesystem containing your workbook
*/
public void processWorkbookEvents(HSSFRequest req, POIFSFileSystem fs)
throws IOException
{
public void processWorkbookEvents(HSSFRequest req, POIFSFileSystem fs) throws IOException {
InputStream in = fs.createDocumentInputStream("Workbook");
processEvents(req, in);
@ -70,10 +64,8 @@ public class HSSFEventFactory
* @param fs a POIFS filesystem containing your workbook
* @return numeric user-specified result code.
*/
public short abortableProcessWorkbookEvents(HSSFRequest req, POIFSFileSystem fs)
throws IOException, HSSFUserException
{
throws IOException, HSSFUserException {
InputStream in = fs.createDocumentInputStream("Workbook");
return abortableProcessEvents(req, in);
}
@ -89,16 +81,12 @@ public class HSSFEventFactory
* @param req an Instance of HSSFRequest which has your registered listeners
* @param in a DocumentInputStream obtained from POIFS's POIFSFileSystem object
*/
public void processEvents(HSSFRequest req, InputStream in)
throws IOException
{
try
{
public void processEvents(HSSFRequest req, InputStream in) {
try {
genericProcessEvents(req, new RecordInputStream(in));
} catch (HSSFUserException hue) {
/*If an HSSFUserException user exception is thrown, ignore it.*/
}
catch (HSSFUserException hue)
{/*If an HSSFUserException user exception is thrown, ignore it.*/ }
}
@ -110,10 +98,8 @@ public class HSSFEventFactory
* @param in a DocumentInputStream obtained from POIFS's POIFSFileSystem object
* @return numeric user-specified result code.
*/
public short abortableProcessEvents(HSSFRequest req, InputStream in)
throws IOException, HSSFUserException
{
throws HSSFUserException {
return genericProcessEvents(req, new RecordInputStream(in));
}
@ -125,25 +111,22 @@ public class HSSFEventFactory
* @param in a DocumentInputStream obtained from POIFS's POIFSFileSystem object
* @return numeric user-specified result code.
*/
protected short genericProcessEvents(HSSFRequest req, RecordInputStream in)
throws IOException, HSSFUserException
{
boolean going = true;
throws HSSFUserException {
short userCode = 0;
Record r = null;
// Create a new RecordStream and use that
RecordFactoryInputStream recordStream = new RecordFactoryInputStream(in);
RecordFactoryInputStream recordStream = new RecordFactoryInputStream(in, false);
// Process each record as they come in
while(going) {
r = recordStream.nextRecord();
if(r != null) {
while(true) {
Record r = recordStream.nextRecord();
if(r == null) {
break;
}
userCode = req.processRecord(r);
if (userCode != 0) break;
} else {
going = false;
if (userCode != 0) {
break;
}
}

View File

@ -369,8 +369,7 @@ public final class RecordFactory {
public static List<Record> createRecords(InputStream in) throws RecordFormatException {
List<Record> records = new ArrayList<Record>(NUM_RECORDS);
RecordFactoryInputStream recStream = new RecordFactoryInputStream(new RecordInputStream(in));
recStream.setIncludeContinueRecords(true);
RecordFactoryInputStream recStream = new RecordFactoryInputStream(new RecordInputStream(in), true);
Record record;
while ((record = recStream.nextRecord())!=null) {

View File

@ -19,10 +19,6 @@ package org.apache.poi.hssf.record;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
/**
* A stream based way to get at complete records, with
* as low a memory footprint as possible.
@ -34,84 +30,44 @@ import java.util.List;
* them, but this does allow for a "pull" style of coding.
*/
public class RecordFactoryInputStream {
private final RecordInputStream recStream;
private final RecordInputStream _recStream;
private final boolean _shouldIncludeContinueRecords;
/**
* Have we returned all the records there are?
* Temporarily stores a group of {@link NumberRecord}s. This is uses when the most
* recently read underlying record is a {@link MulRKRecord}
*/
private boolean complete = false;
private NumberRecord[] _multipleNumberRecords;
/**
* Sometimes we end up with a bunch of
* records. When we do, these should
* be returned before the next normal
* record processing occurs (i.e. before
* we check for continue records and
* return rec)
* used to help iterating over multiple number records
*/
private final LinkedList bonusRecords = new LinkedList();
private int _multipleNumberRecordIndex = -1;
/**
* The most recent record that we gave to the user
*/
private Record lastRecord = null;
private Record _lastRecord = null;
/**
* The most recent DrawingRecord seen
*/
private DrawingRecord lastDrawingRecord = new DrawingRecord();
private DrawingRecord _lastDrawingRecord = new DrawingRecord();
private int bofDepth = 0;
private int _bofDepth;
private boolean lastRecordWasEOFLevelZero = false;
private boolean _lastRecordWasEOFLevelZero;
private boolean includeContinueRecords = false;
public RecordFactoryInputStream(RecordInputStream inp) {
recStream = inp;
}
/**
* Returns the next (complete) record from the
* stream, or null if there are no more.
* @param shouldIncludeContinueRecords caller can pass <code>false</code> if loose
* {@link ContinueRecord}s should be skipped (this is sometimes useful in event based
* processing).
*/
public Record nextRecord() {
Record r = null;
public RecordFactoryInputStream(RecordInputStream inp, boolean shouldIncludeContinueRecords) {
_recStream = inp;
_shouldIncludeContinueRecords = shouldIncludeContinueRecords;
// Loop until we get something
while (r == null && !complete) {
// Are there any bonus records that we need to
// return?
r = getBonusRecord();
// If not, ask for the next real record
if (r == null) {
r = getNextRecord();
}
}
// All done
return r;
}
/**
* If there are any "bonus" records, that should
* be returned before processing new ones,
* grabs the next and returns it.
* If not, returns null;
*/
private Record getBonusRecord() {
if (!bonusRecords.isEmpty()) {
return (Record) bonusRecords.removeFirst();
}
return null;
}
/**
* Returns the next available record, or null if
* this pass didn't return a record that's
* suitable for returning (eg was a continue record).
*/
private Record getNextRecord() {
/*
* How to recognise end of stream?
* In the best case, the underlying input stream (in) ends just after the last EOF record
@ -129,29 +85,88 @@ public class RecordFactoryInputStream {
* record might follow any EOF record. So we also need to keep track of the bof/eof
* nesting level.
*/
_bofDepth=0;
_lastRecordWasEOFLevelZero = false;
}
if (recStream.hasNextRecord()) {
// Grab our next record
recStream.nextRecord();
if (lastRecordWasEOFLevelZero && recStream.getSid() != BOFRecord.sid) {
// Normally InputStream (in) contains only zero padding after this point
complete = true;
/**
* Returns the next (complete) record from the
* stream, or null if there are no more.
*/
public Record nextRecord() {
Record r;
r = getNextMultipleNumberRecord();
if (r != null) {
// found a NumberRecord (expanded from a recent MULRK record)
return r;
}
while (true) {
if (!_recStream.hasNextRecord()) {
// recStream is exhausted;
return null;
}
Record record = RecordFactory.createSingleRecord(recStream);
lastRecordWasEOFLevelZero = false;
// step underlying RecordInputStream to the next record
_recStream.nextRecord();
if (_lastRecordWasEOFLevelZero) {
// Potential place for ending the workbook stream
// Check that the next record is not BOFRecord(0x0809)
// Normally the input stream contains only zero padding after the last EOFRecord,
// but bug 46987 suggests that the padding may be garbage.
// This code relies on the padding bytes not starting with BOFRecord.sid
if (_recStream.getSid() != BOFRecord.sid) {
return null;
}
// else - another sheet substream starting here
}
r = readNextRecord();
if (r == null) {
// some record types may get skipped (e.g. DBCellRecord and ContinueRecord)
continue;
}
return r;
}
}
/**
* @return the next {@link NumberRecord} from the multiple record group as expanded from
* a recently read {@link MulRKRecord}. <code>null</code> if not present.
*/
private NumberRecord getNextMultipleNumberRecord() {
if (_multipleNumberRecords != null) {
int ix = _multipleNumberRecordIndex;
if (ix < _multipleNumberRecords.length) {
NumberRecord result = _multipleNumberRecords[ix];
_multipleNumberRecordIndex = ix + 1;
return result;
}
_multipleNumberRecordIndex = -1;
_multipleNumberRecords = null;
}
return null;
}
/**
* @return the next available record, or <code>null</code> if
* this pass didn't return a record that's
* suitable for returning (eg was a continue record).
*/
private Record readNextRecord() {
Record record = RecordFactory.createSingleRecord(_recStream);
_lastRecordWasEOFLevelZero = false;
if (record instanceof BOFRecord) {
bofDepth++;
_bofDepth++;
return record;
}
if (record instanceof EOFRecord) {
bofDepth--;
if (bofDepth < 1) {
lastRecordWasEOFLevelZero = true;
_bofDepth--;
if (_bofDepth < 1) {
_lastRecordWasEOFLevelZero = true;
}
return record;
@ -169,68 +184,55 @@ public class RecordFactoryInputStream {
if (record instanceof MulRKRecord) {
NumberRecord[] records = RecordFactory.convertRKRecords((MulRKRecord) record);
List<NumberRecord> list = Arrays.asList(records);
bonusRecords.addAll(list.subList(1, list.size()));
_multipleNumberRecords = records;
_multipleNumberRecordIndex = 1;
return records[0];
}
if (record.getSid() == DrawingGroupRecord.sid
&& lastRecord instanceof DrawingGroupRecord) {
DrawingGroupRecord lastDGRecord = (DrawingGroupRecord) lastRecord;
&& _lastRecord instanceof DrawingGroupRecord) {
DrawingGroupRecord lastDGRecord = (DrawingGroupRecord) _lastRecord;
lastDGRecord.join((AbstractEscherHolderRecord) record);
return null;
} else if (record.getSid() == ContinueRecord.sid) {
}
if (record.getSid() == ContinueRecord.sid) {
ContinueRecord contRec = (ContinueRecord) record;
if (lastRecord instanceof ObjRecord || lastRecord instanceof TextObjectRecord) {
if (_lastRecord instanceof ObjRecord || _lastRecord instanceof TextObjectRecord) {
// Drawing records have a very strange continue behaviour.
//There can actually be OBJ records mixed between the continues.
lastDrawingRecord.processContinueRecord(contRec.getData());
_lastDrawingRecord.processContinueRecord(contRec.getData());
//we must remember the position of the continue record.
//in the serialization procedure the original structure of records must be preserved
if (includeContinueRecords) {
if (_shouldIncludeContinueRecords) {
return record;
} else {
}
return null;
}
} else if (lastRecord instanceof DrawingGroupRecord) {
((DrawingGroupRecord) lastRecord).processContinueRecord(contRec.getData());
if (_lastRecord instanceof DrawingGroupRecord) {
((DrawingGroupRecord) _lastRecord).processContinueRecord(contRec.getData());
return null;
} else if (lastRecord instanceof DrawingRecord) {
((DrawingRecord) lastRecord).processContinueRecord(contRec.getData());
}
if (_lastRecord instanceof DrawingRecord) {
((DrawingRecord) _lastRecord).processContinueRecord(contRec.getData());
return null;
} else if (lastRecord instanceof UnknownRecord) {
}
if (_lastRecord instanceof UnknownRecord) {
//Gracefully handle records that we don't know about,
//that happen to be continued
return record;
} else if (lastRecord instanceof EOFRecord) {
}
if (_lastRecord instanceof EOFRecord) {
// This is really odd, but excel still sometimes
// outputs a file like this all the same
return record;
} else {
}
throw new RecordFormatException("Unhandled Continue Record");
}
} else {
lastRecord = record;
_lastRecord = record;
if (record instanceof DrawingRecord) {
lastDrawingRecord = (DrawingRecord) record;
_lastDrawingRecord = (DrawingRecord) record;
}
return record;
}
} else {
// No more records
complete = true;
return null;
}
}
/**
* Return or not ContinueRecord in nextRecord
*/
public void setIncludeContinueRecords(boolean includeContinueRecords) {
this.includeContinueRecords = includeContinueRecords;
}
}