code improvements to RecordFactoryInputStream
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@801850 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
acc07e2d6b
commit
755b86af67
@ -31,7 +31,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
* processWorkbookEvents along with a request.
|
||||
*
|
||||
* This will cause your file to be processed a record at a time. Each record with
|
||||
* a static id matching one that you have registed in your HSSFRequest will be passed
|
||||
* a static id matching one that you have registered in your HSSFRequest will be passed
|
||||
* to your associated HSSFListener.
|
||||
*
|
||||
* @see org.apache.poi.hssf.dev.EFHSSF
|
||||
@ -39,13 +39,10 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
* @author Andrew C. Oliver (acoliver at apache dot org)
|
||||
* @author Carey Sublette (careysub@earthling.net)
|
||||
*/
|
||||
|
||||
public class HSSFEventFactory
|
||||
{
|
||||
public class HSSFEventFactory {
|
||||
/** Creates a new instance of HSSFEventFactory */
|
||||
|
||||
public HSSFEventFactory()
|
||||
{
|
||||
public HSSFEventFactory() {
|
||||
// no instance fields
|
||||
}
|
||||
|
||||
/**
|
||||
@ -54,10 +51,7 @@ public class HSSFEventFactory
|
||||
* @param req an Instance of HSSFRequest which has your registered listeners
|
||||
* @param fs a POIFS filesystem containing your workbook
|
||||
*/
|
||||
|
||||
public void processWorkbookEvents(HSSFRequest req, POIFSFileSystem fs)
|
||||
throws IOException
|
||||
{
|
||||
public void processWorkbookEvents(HSSFRequest req, POIFSFileSystem fs) throws IOException {
|
||||
InputStream in = fs.createDocumentInputStream("Workbook");
|
||||
|
||||
processEvents(req, in);
|
||||
@ -70,10 +64,8 @@ public class HSSFEventFactory
|
||||
* @param fs a POIFS filesystem containing your workbook
|
||||
* @return numeric user-specified result code.
|
||||
*/
|
||||
|
||||
public short abortableProcessWorkbookEvents(HSSFRequest req, POIFSFileSystem fs)
|
||||
throws IOException, HSSFUserException
|
||||
{
|
||||
throws IOException, HSSFUserException {
|
||||
InputStream in = fs.createDocumentInputStream("Workbook");
|
||||
return abortableProcessEvents(req, in);
|
||||
}
|
||||
@ -89,16 +81,12 @@ public class HSSFEventFactory
|
||||
* @param req an Instance of HSSFRequest which has your registered listeners
|
||||
* @param in a DocumentInputStream obtained from POIFS's POIFSFileSystem object
|
||||
*/
|
||||
|
||||
public void processEvents(HSSFRequest req, InputStream in)
|
||||
throws IOException
|
||||
{
|
||||
try
|
||||
{
|
||||
public void processEvents(HSSFRequest req, InputStream in) {
|
||||
try {
|
||||
genericProcessEvents(req, new RecordInputStream(in));
|
||||
} catch (HSSFUserException hue) {
|
||||
/*If an HSSFUserException user exception is thrown, ignore it.*/
|
||||
}
|
||||
catch (HSSFUserException hue)
|
||||
{/*If an HSSFUserException user exception is thrown, ignore it.*/ }
|
||||
}
|
||||
|
||||
|
||||
@ -110,10 +98,8 @@ public class HSSFEventFactory
|
||||
* @param in a DocumentInputStream obtained from POIFS's POIFSFileSystem object
|
||||
* @return numeric user-specified result code.
|
||||
*/
|
||||
|
||||
public short abortableProcessEvents(HSSFRequest req, InputStream in)
|
||||
throws IOException, HSSFUserException
|
||||
{
|
||||
throws HSSFUserException {
|
||||
return genericProcessEvents(req, new RecordInputStream(in));
|
||||
}
|
||||
|
||||
@ -125,25 +111,22 @@ public class HSSFEventFactory
|
||||
* @param in a DocumentInputStream obtained from POIFS's POIFSFileSystem object
|
||||
* @return numeric user-specified result code.
|
||||
*/
|
||||
|
||||
protected short genericProcessEvents(HSSFRequest req, RecordInputStream in)
|
||||
throws IOException, HSSFUserException
|
||||
{
|
||||
boolean going = true;
|
||||
throws HSSFUserException {
|
||||
short userCode = 0;
|
||||
Record r = null;
|
||||
|
||||
// Create a new RecordStream and use that
|
||||
RecordFactoryInputStream recordStream = new RecordFactoryInputStream(in);
|
||||
RecordFactoryInputStream recordStream = new RecordFactoryInputStream(in, false);
|
||||
|
||||
// Process each record as they come in
|
||||
while(going) {
|
||||
r = recordStream.nextRecord();
|
||||
if(r != null) {
|
||||
while(true) {
|
||||
Record r = recordStream.nextRecord();
|
||||
if(r == null) {
|
||||
break;
|
||||
}
|
||||
userCode = req.processRecord(r);
|
||||
if (userCode != 0) break;
|
||||
} else {
|
||||
going = false;
|
||||
if (userCode != 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -369,8 +369,7 @@ public final class RecordFactory {
|
||||
public static List<Record> createRecords(InputStream in) throws RecordFormatException {
|
||||
List<Record> records = new ArrayList<Record>(NUM_RECORDS);
|
||||
|
||||
RecordFactoryInputStream recStream = new RecordFactoryInputStream(new RecordInputStream(in));
|
||||
recStream.setIncludeContinueRecords(true);
|
||||
RecordFactoryInputStream recStream = new RecordFactoryInputStream(new RecordInputStream(in), true);
|
||||
|
||||
Record record;
|
||||
while ((record = recStream.nextRecord())!=null) {
|
||||
|
@ -19,10 +19,6 @@ package org.apache.poi.hssf.record;
|
||||
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
|
||||
import org.apache.poi.hssf.eventusermodel.HSSFListener;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A stream based way to get at complete records, with
|
||||
* as low a memory footprint as possible.
|
||||
@ -34,84 +30,44 @@ import java.util.List;
|
||||
* them, but this does allow for a "pull" style of coding.
|
||||
*/
|
||||
public class RecordFactoryInputStream {
|
||||
private final RecordInputStream recStream;
|
||||
|
||||
private final RecordInputStream _recStream;
|
||||
private final boolean _shouldIncludeContinueRecords;
|
||||
|
||||
/**
|
||||
* Have we returned all the records there are?
|
||||
* Temporarily stores a group of {@link NumberRecord}s. This is uses when the most
|
||||
* recently read underlying record is a {@link MulRKRecord}
|
||||
*/
|
||||
private boolean complete = false;
|
||||
private NumberRecord[] _multipleNumberRecords;
|
||||
|
||||
/**
|
||||
* Sometimes we end up with a bunch of
|
||||
* records. When we do, these should
|
||||
* be returned before the next normal
|
||||
* record processing occurs (i.e. before
|
||||
* we check for continue records and
|
||||
* return rec)
|
||||
* used to help iterating over multiple number records
|
||||
*/
|
||||
private final LinkedList bonusRecords = new LinkedList();
|
||||
private int _multipleNumberRecordIndex = -1;
|
||||
|
||||
/**
|
||||
* The most recent record that we gave to the user
|
||||
*/
|
||||
private Record lastRecord = null;
|
||||
private Record _lastRecord = null;
|
||||
/**
|
||||
* The most recent DrawingRecord seen
|
||||
*/
|
||||
private DrawingRecord lastDrawingRecord = new DrawingRecord();
|
||||
private DrawingRecord _lastDrawingRecord = new DrawingRecord();
|
||||
|
||||
private int bofDepth = 0;
|
||||
private int _bofDepth;
|
||||
|
||||
private boolean lastRecordWasEOFLevelZero = false;
|
||||
private boolean _lastRecordWasEOFLevelZero;
|
||||
|
||||
private boolean includeContinueRecords = false;
|
||||
|
||||
public RecordFactoryInputStream(RecordInputStream inp) {
|
||||
recStream = inp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next (complete) record from the
|
||||
* stream, or null if there are no more.
|
||||
* @param shouldIncludeContinueRecords caller can pass <code>false</code> if loose
|
||||
* {@link ContinueRecord}s should be skipped (this is sometimes useful in event based
|
||||
* processing).
|
||||
*/
|
||||
public Record nextRecord() {
|
||||
Record r = null;
|
||||
public RecordFactoryInputStream(RecordInputStream inp, boolean shouldIncludeContinueRecords) {
|
||||
_recStream = inp;
|
||||
_shouldIncludeContinueRecords = shouldIncludeContinueRecords;
|
||||
|
||||
// Loop until we get something
|
||||
while (r == null && !complete) {
|
||||
// Are there any bonus records that we need to
|
||||
// return?
|
||||
r = getBonusRecord();
|
||||
|
||||
// If not, ask for the next real record
|
||||
if (r == null) {
|
||||
r = getNextRecord();
|
||||
}
|
||||
}
|
||||
|
||||
// All done
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* If there are any "bonus" records, that should
|
||||
* be returned before processing new ones,
|
||||
* grabs the next and returns it.
|
||||
* If not, returns null;
|
||||
*/
|
||||
private Record getBonusRecord() {
|
||||
if (!bonusRecords.isEmpty()) {
|
||||
return (Record) bonusRecords.removeFirst();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next available record, or null if
|
||||
* this pass didn't return a record that's
|
||||
* suitable for returning (eg was a continue record).
|
||||
*/
|
||||
private Record getNextRecord() {
|
||||
/*
|
||||
* How to recognise end of stream?
|
||||
* In the best case, the underlying input stream (in) ends just after the last EOF record
|
||||
@ -129,29 +85,88 @@ public class RecordFactoryInputStream {
|
||||
* record might follow any EOF record. So we also need to keep track of the bof/eof
|
||||
* nesting level.
|
||||
*/
|
||||
_bofDepth=0;
|
||||
_lastRecordWasEOFLevelZero = false;
|
||||
}
|
||||
|
||||
if (recStream.hasNextRecord()) {
|
||||
// Grab our next record
|
||||
recStream.nextRecord();
|
||||
|
||||
if (lastRecordWasEOFLevelZero && recStream.getSid() != BOFRecord.sid) {
|
||||
// Normally InputStream (in) contains only zero padding after this point
|
||||
complete = true;
|
||||
/**
|
||||
* Returns the next (complete) record from the
|
||||
* stream, or null if there are no more.
|
||||
*/
|
||||
public Record nextRecord() {
|
||||
Record r;
|
||||
r = getNextMultipleNumberRecord();
|
||||
if (r != null) {
|
||||
// found a NumberRecord (expanded from a recent MULRK record)
|
||||
return r;
|
||||
}
|
||||
while (true) {
|
||||
if (!_recStream.hasNextRecord()) {
|
||||
// recStream is exhausted;
|
||||
return null;
|
||||
}
|
||||
|
||||
Record record = RecordFactory.createSingleRecord(recStream);
|
||||
lastRecordWasEOFLevelZero = false;
|
||||
// step underlying RecordInputStream to the next record
|
||||
_recStream.nextRecord();
|
||||
|
||||
if (_lastRecordWasEOFLevelZero) {
|
||||
// Potential place for ending the workbook stream
|
||||
// Check that the next record is not BOFRecord(0x0809)
|
||||
// Normally the input stream contains only zero padding after the last EOFRecord,
|
||||
// but bug 46987 suggests that the padding may be garbage.
|
||||
// This code relies on the padding bytes not starting with BOFRecord.sid
|
||||
if (_recStream.getSid() != BOFRecord.sid) {
|
||||
return null;
|
||||
}
|
||||
// else - another sheet substream starting here
|
||||
}
|
||||
|
||||
r = readNextRecord();
|
||||
if (r == null) {
|
||||
// some record types may get skipped (e.g. DBCellRecord and ContinueRecord)
|
||||
continue;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the next {@link NumberRecord} from the multiple record group as expanded from
|
||||
* a recently read {@link MulRKRecord}. <code>null</code> if not present.
|
||||
*/
|
||||
private NumberRecord getNextMultipleNumberRecord() {
|
||||
if (_multipleNumberRecords != null) {
|
||||
int ix = _multipleNumberRecordIndex;
|
||||
if (ix < _multipleNumberRecords.length) {
|
||||
NumberRecord result = _multipleNumberRecords[ix];
|
||||
_multipleNumberRecordIndex = ix + 1;
|
||||
return result;
|
||||
}
|
||||
_multipleNumberRecordIndex = -1;
|
||||
_multipleNumberRecords = null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the next available record, or <code>null</code> if
|
||||
* this pass didn't return a record that's
|
||||
* suitable for returning (eg was a continue record).
|
||||
*/
|
||||
private Record readNextRecord() {
|
||||
|
||||
Record record = RecordFactory.createSingleRecord(_recStream);
|
||||
_lastRecordWasEOFLevelZero = false;
|
||||
|
||||
if (record instanceof BOFRecord) {
|
||||
bofDepth++;
|
||||
_bofDepth++;
|
||||
return record;
|
||||
}
|
||||
|
||||
if (record instanceof EOFRecord) {
|
||||
bofDepth--;
|
||||
if (bofDepth < 1) {
|
||||
lastRecordWasEOFLevelZero = true;
|
||||
_bofDepth--;
|
||||
if (_bofDepth < 1) {
|
||||
_lastRecordWasEOFLevelZero = true;
|
||||
}
|
||||
|
||||
return record;
|
||||
@ -169,68 +184,55 @@ public class RecordFactoryInputStream {
|
||||
if (record instanceof MulRKRecord) {
|
||||
NumberRecord[] records = RecordFactory.convertRKRecords((MulRKRecord) record);
|
||||
|
||||
List<NumberRecord> list = Arrays.asList(records);
|
||||
bonusRecords.addAll(list.subList(1, list.size()));
|
||||
|
||||
_multipleNumberRecords = records;
|
||||
_multipleNumberRecordIndex = 1;
|
||||
return records[0];
|
||||
}
|
||||
|
||||
if (record.getSid() == DrawingGroupRecord.sid
|
||||
&& lastRecord instanceof DrawingGroupRecord) {
|
||||
DrawingGroupRecord lastDGRecord = (DrawingGroupRecord) lastRecord;
|
||||
&& _lastRecord instanceof DrawingGroupRecord) {
|
||||
DrawingGroupRecord lastDGRecord = (DrawingGroupRecord) _lastRecord;
|
||||
lastDGRecord.join((AbstractEscherHolderRecord) record);
|
||||
return null;
|
||||
} else if (record.getSid() == ContinueRecord.sid) {
|
||||
}
|
||||
if (record.getSid() == ContinueRecord.sid) {
|
||||
ContinueRecord contRec = (ContinueRecord) record;
|
||||
|
||||
if (lastRecord instanceof ObjRecord || lastRecord instanceof TextObjectRecord) {
|
||||
if (_lastRecord instanceof ObjRecord || _lastRecord instanceof TextObjectRecord) {
|
||||
// Drawing records have a very strange continue behaviour.
|
||||
//There can actually be OBJ records mixed between the continues.
|
||||
lastDrawingRecord.processContinueRecord(contRec.getData());
|
||||
_lastDrawingRecord.processContinueRecord(contRec.getData());
|
||||
//we must remember the position of the continue record.
|
||||
//in the serialization procedure the original structure of records must be preserved
|
||||
if (includeContinueRecords) {
|
||||
if (_shouldIncludeContinueRecords) {
|
||||
return record;
|
||||
} else {
|
||||
}
|
||||
return null;
|
||||
}
|
||||
} else if (lastRecord instanceof DrawingGroupRecord) {
|
||||
((DrawingGroupRecord) lastRecord).processContinueRecord(contRec.getData());
|
||||
if (_lastRecord instanceof DrawingGroupRecord) {
|
||||
((DrawingGroupRecord) _lastRecord).processContinueRecord(contRec.getData());
|
||||
return null;
|
||||
} else if (lastRecord instanceof DrawingRecord) {
|
||||
((DrawingRecord) lastRecord).processContinueRecord(contRec.getData());
|
||||
}
|
||||
if (_lastRecord instanceof DrawingRecord) {
|
||||
((DrawingRecord) _lastRecord).processContinueRecord(contRec.getData());
|
||||
return null;
|
||||
} else if (lastRecord instanceof UnknownRecord) {
|
||||
}
|
||||
if (_lastRecord instanceof UnknownRecord) {
|
||||
//Gracefully handle records that we don't know about,
|
||||
//that happen to be continued
|
||||
return record;
|
||||
} else if (lastRecord instanceof EOFRecord) {
|
||||
}
|
||||
if (_lastRecord instanceof EOFRecord) {
|
||||
// This is really odd, but excel still sometimes
|
||||
// outputs a file like this all the same
|
||||
return record;
|
||||
} else {
|
||||
}
|
||||
throw new RecordFormatException("Unhandled Continue Record");
|
||||
}
|
||||
} else {
|
||||
lastRecord = record;
|
||||
_lastRecord = record;
|
||||
if (record instanceof DrawingRecord) {
|
||||
lastDrawingRecord = (DrawingRecord) record;
|
||||
_lastDrawingRecord = (DrawingRecord) record;
|
||||
}
|
||||
|
||||
return record;
|
||||
}
|
||||
|
||||
} else {
|
||||
// No more records
|
||||
complete = true;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return or not ContinueRecord in nextRecord
|
||||
*/
|
||||
public void setIncludeContinueRecords(boolean includeContinueRecords) {
|
||||
this.includeContinueRecords = includeContinueRecords;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user