Convert HSSFEventFactory to using the new HSSFRecordStream, which returns fully-formed HSSFRecords. HSSFRecordStream allows for pull-style eventusermodel processing

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@617167 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-01-31 17:30:16 +00:00
parent 22d8a53710
commit 21b5c6d8cd
5 changed files with 272 additions and 105 deletions

View File

@ -36,8 +36,7 @@
<!-- Don't forget to update status.xml too! --> <!-- Don't forget to update status.xml too! -->
<release version="3.1-beta1" date="2008-??-??"> <release version="3.1-beta1" date="2008-??-??">
<!-- an action tag is requered to keep Forrest quite. --> <action dev="POI-DEVELOPERS" type="add">Support for a completed Record based "pull" stream, via org.apache.poi.hssf.eventusermodel.HSSFRecordStream, to complement the existing "push" Event User Model listener stuff</action>
<action dev="POI-DEVELOPERS" type="fix">...</action>
</release> </release>
<release version="3.0.2-FINAL" date="2008-02-04"> <release version="3.0.2-FINAL" date="2008-02-04">
<action dev="POI-DEVELOPERS" type="fix">44297 - IntPtg must operate with unsigned short. Reading signed short results in incorrect formula calculation</action> <action dev="POI-DEVELOPERS" type="fix">44297 - IntPtg must operate with unsigned short. Reading signed short results in incorrect formula calculation</action>

View File

@ -33,6 +33,7 @@
<!-- Don't forget to update changes.xml too! --> <!-- Don't forget to update changes.xml too! -->
<changes> <changes>
<release version="3.1-beta1" date="2008-??-??"> <release version="3.1-beta1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="add">Support for a completed Record based "pull" stream, via org.apache.poi.hssf.eventusermodel.HSSFRecordStream, to complement the existing "push" Event User Model listener stuff</action>
</release> </release>
<release version="3.0.2-FINAL" date="2008-02-04"> <release version="3.0.2-FINAL" date="2008-02-04">
<action dev="POI-DEVELOPERS" type="fix">44297 - IntPtg must operate with unsigned short. Reading signed short results in incorrect formula calculation</action> <action dev="POI-DEVELOPERS" type="fix">44297 - IntPtg must operate with unsigned short. Reading signed short results in incorrect formula calculation</action>

View File

@ -129,113 +129,25 @@ public class HSSFEventFactory
protected short genericProcessEvents(HSSFRequest req, RecordInputStream in) protected short genericProcessEvents(HSSFRequest req, RecordInputStream in)
throws IOException, HSSFUserException throws IOException, HSSFUserException
{ {
boolean going = true;
short userCode = 0; short userCode = 0;
Record r = null;
short sid = 0;
process: // Create a new RecordStream and use that
{ HSSFRecordStream recordStream = new HSSFRecordStream(in);
Record rec = null; // Process each record as they come in
Record lastRec = null; while(going) {
DrawingRecord lastDrawingRecord = new DrawingRecord(); r = recordStream.nextRecord();
if(r != null) {
while (in.hasNextRecord()) userCode = req.processRecord(r);
{ if (userCode != 0) break;
in.nextRecord(); } else {
sid = in.getSid();; going = false;
//
// for some reasons we have to make the workbook to be at least 4096 bytes
// but if we have such workbook we fill the end of it with zeros (many zeros)
//
// it is not good:
// if the length( all zero records ) % 4 = 1
// e.g.: any zero record would be readed as 4 bytes at once ( 2 - id and 2 - size ).
// And the last 1 byte will be readed WRONG ( the id must be 2 bytes )
//
// So we should better to check if the sid is zero and not to read more data
// The zero sid shows us that rest of the stream data is a fake to make workbook
// certain size
//
if ( sid == 0 )
break;
// If we had a last record, and this one
// isn't a continue record, then pass
// it on to the listener
if ((rec != null) && (sid != ContinueRecord.sid))
{
userCode = req.processRecord(rec);
if (userCode != 0) break process;
}
// If this record isn't a continue record,
// then build it up
if (sid != ContinueRecord.sid)
{
//System.out.println("creating "+sid);
Record[] recs = RecordFactory.createRecord(in);
// We know that the multiple record situations
// don't contain continue records, so just
// pass those on to the listener now
if (recs.length > 1) {
for (int k = 0; k < (recs.length - 1); k++) {
userCode = req.processRecord(
recs[ k ]);
if (userCode != 0) break process;
}
}
// Regardless of the number we created, always hold
// onto the last record to be processed on the next
// loop, in case it has any continue records
rec = recs[ recs.length - 1 ];
}
else {
// Normally, ContinueRecords are handled internally
// However, in a few cases, there is a gap between a record at
// its Continue, so we have to handle them specially
// This logic is much like in RecordFactory.createRecords()
Record[] recs = RecordFactory.createRecord(in);
ContinueRecord crec = (ContinueRecord)recs[0];
if((lastRec instanceof ObjRecord) || (lastRec instanceof TextObjectRecord)) {
// You can have Obj records between a DrawingRecord
// and its continue!
lastDrawingRecord.processContinueRecord( crec.getData() );
// Trigger them on the drawing record, now it's complete
rec = lastDrawingRecord;
}
else if((lastRec instanceof DrawingGroupRecord)) {
((DrawingGroupRecord)lastRec).processContinueRecord(crec.getData());
// Trigger them on the drawing record, now it's complete
rec = lastRec;
}
else {
if (rec instanceof UnknownRecord) {
;//silently skip records we don't know about
} else {
throw new RecordFormatException("Records should handle ContinueRecord internally. Should not see this exception");
}
}
}
// Update our tracking of the last record
lastRec = rec;
if(rec instanceof DrawingRecord) {
lastDrawingRecord = (DrawingRecord)rec;
}
} // main while loop
// Process the last record in the stream, if
// it's still outstanding
if (rec != null) {
userCode = req.processRecord(rec);
if (userCode != 0) break process;
} }
} }
// All done, return our last code
return userCode; return userCode;
} }
} }

View File

@ -0,0 +1,234 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.eventusermodel;
import java.util.Vector;
import org.apache.poi.hssf.record.ContinueRecord;
import org.apache.poi.hssf.record.DrawingGroupRecord;
import org.apache.poi.hssf.record.DrawingRecord;
import org.apache.poi.hssf.record.ObjRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.RecordFactory;
import org.apache.poi.hssf.record.RecordFormatException;
import org.apache.poi.hssf.record.RecordInputStream;
import org.apache.poi.hssf.record.TextObjectRecord;
import org.apache.poi.hssf.record.UnknownRecord;
/**
* A stream based way to get at complete records, with
* as low a memory footprint as possible.
* This handles reading from a RecordInputStream, turning
* the data into full records, processing continue records
* etc.
* Most users should use {@link HSSFEventFactory} /
* {@link HSSFListener} and have new records pushed to
* them, but this does allow for a "pull" style of coding.
*/
public class HSSFRecordStream {
private RecordInputStream in;
/** Have we run out of records on the stream? */
private boolean hitEOS = false;
/** Have we returned all the records there are? */
private boolean complete = false;
/**
* Sometimes we end up with a bunch of
* records. When we do, these should
* be returned before the next normal
* record processing occurs (i.e. before
* we check for continue records and
* return rec)
*/
private Vector bonusRecords = null;
/**
* The next record to return, which may need to have its
* continue records passed to it before we do
*/
private Record rec = null;
/**
* The most recent record that we gave to the user
*/
private Record lastRec = null;
/**
* The most recent DrawingRecord seen
*/
private DrawingRecord lastDrawingRecord = new DrawingRecord();
public HSSFRecordStream(RecordInputStream inp) {
this.in = inp;
}
/**
* Returns the next (complete) record from the
* stream, or null if there are no more.
*/
public Record nextRecord() {
Record r = null;
// Loop until we get something
while(r == null && !complete) {
// Are there any bonus records that we need to
// return?
r = getBonusRecord();
// If not, ask for the next real record
if(r == null) {
r = getNextRecord();
}
}
// All done
return r;
}
/**
* If there are any "bonus" records, that should
* be returned before processing new ones,
* grabs the next and returns it.
* If not, returns null;
*/
private Record getBonusRecord() {
if(bonusRecords != null) {
Record r = (Record)bonusRecords.remove(0);
if(bonusRecords.size() == 0) {
bonusRecords = null;
}
return r;
}
return null;
}
/**
* Returns the next available record, or null if
* this pass didn't return a record that's
* suitable for returning (eg was a continue record).
*/
private Record getNextRecord() {
Record toReturn = null;
if(in.hasNextRecord()) {
// Grab our next record
in.nextRecord();
short sid = in.getSid();
//
// for some reasons we have to make the workbook to be at least 4096 bytes
// but if we have such workbook we fill the end of it with zeros (many zeros)
//
// it is not good:
// if the length( all zero records ) % 4 = 1
// e.g.: any zero record would be readed as 4 bytes at once ( 2 - id and 2 - size ).
// And the last 1 byte will be readed WRONG ( the id must be 2 bytes )
//
// So we should better to check if the sid is zero and not to read more data
// The zero sid shows us that rest of the stream data is a fake to make workbook
// certain size
//
if ( sid == 0 )
return null;
// If we had a last record, and this one
// isn't a continue record, then pass
// it on to the listener
if ((rec != null) && (sid != ContinueRecord.sid))
{
// This last record ought to be returned
toReturn = rec;
}
// If this record isn't a continue record,
// then build it up
if (sid != ContinueRecord.sid)
{
//System.out.println("creating "+sid);
Record[] recs = RecordFactory.createRecord(in);
// We know that the multiple record situations
// don't contain continue records, so just
// pass those on to the listener now
if (recs.length > 1) {
bonusRecords = new Vector(recs.length-1);
for (int k = 0; k < (recs.length - 1); k++) {
bonusRecords.add(recs[k]);
}
}
// Regardless of the number we created, always hold
// onto the last record to be processed on the next
// loop, in case it has any continue records
rec = recs[ recs.length - 1 ];
// Don't return it just yet though, as we probably have
// a record from the last round to return
}
else {
// Normally, ContinueRecords are handled internally
// However, in a few cases, there is a gap between a record at
// its Continue, so we have to handle them specially
// This logic is much like in RecordFactory.createRecords()
Record[] recs = RecordFactory.createRecord(in);
ContinueRecord crec = (ContinueRecord)recs[0];
if((lastRec instanceof ObjRecord) || (lastRec instanceof TextObjectRecord)) {
// You can have Obj records between a DrawingRecord
// and its continue!
lastDrawingRecord.processContinueRecord( crec.getData() );
// Trigger them on the drawing record, now it's complete
rec = lastDrawingRecord;
}
else if((lastRec instanceof DrawingGroupRecord)) {
((DrawingGroupRecord)lastRec).processContinueRecord(crec.getData());
// Trigger them on the drawing record, now it's complete
rec = lastRec;
}
else {
if (rec instanceof UnknownRecord) {
;//silently skip records we don't know about
} else {
throw new RecordFormatException("Records should handle ContinueRecord internally. Should not see this exception");
}
}
}
// Update our tracking of the last record
lastRec = rec;
if(rec instanceof DrawingRecord) {
lastDrawingRecord = (DrawingRecord)rec;
}
} else {
// No more records
hitEOS = true;
}
// If we've hit the end-of-stream, then
// finish off the last record and be done
if(hitEOS) {
complete = true;
// Return the last record if there was
// one, otherwise null
if(rec != null) {
toReturn = rec;
rec = null;
}
}
return toReturn;
}
}

View File

@ -23,8 +23,13 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.util.ArrayList; import java.util.ArrayList;
import org.apache.poi.hssf.record.DVALRecord;
import org.apache.poi.hssf.record.DVRecord;
import org.apache.poi.hssf.record.EOFRecord;
import org.apache.poi.hssf.record.Record; import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.ContinueRecord; import org.apache.poi.hssf.record.ContinueRecord;
import org.apache.poi.hssf.record.SelectionRecord;
import org.apache.poi.hssf.record.WindowTwoRecord;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import junit.framework.TestCase; import junit.framework.TestCase;
@ -48,7 +53,15 @@ public class TestHSSFEventFactory extends TestCase {
factory.processWorkbookEvents(req, fs); factory.processWorkbookEvents(req, fs);
// Check we got the records // Check we got the records
System.out.println("Processed, found " + mockListen.records.size() + " records");
assertTrue( mockListen.records.size() > 100 ); assertTrue( mockListen.records.size() > 100 );
// Check that the last few records are as we expect
// (Makes sure we don't accidently skip the end ones)
int numRec = mockListen.records.size();
assertEquals(WindowTwoRecord.class, mockListen.records.get(numRec-3).getClass());
assertEquals(SelectionRecord.class, mockListen.records.get(numRec-2).getClass());
assertEquals(EOFRecord.class, mockListen.records.get(numRec-1).getClass());
} }
public void testWithCrazyContinueRecords() throws Exception { public void testWithCrazyContinueRecords() throws Exception {
@ -66,6 +79,7 @@ public class TestHSSFEventFactory extends TestCase {
factory.processWorkbookEvents(req, fs); factory.processWorkbookEvents(req, fs);
// Check we got the records // Check we got the records
System.out.println("Processed, found " + mockListen.records.size() + " records");
assertTrue( mockListen.records.size() > 100 ); assertTrue( mockListen.records.size() > 100 );
// And none of them are continue ones // And none of them are continue ones
@ -74,6 +88,13 @@ public class TestHSSFEventFactory extends TestCase {
for(int i=0; i<r.length; i++) { for(int i=0; i<r.length; i++) {
assertFalse( r[i] instanceof ContinueRecord ); assertFalse( r[i] instanceof ContinueRecord );
} }
// Check that the last few records are as we expect
// (Makes sure we don't accidently skip the end ones)
int numRec = mockListen.records.size();
assertEquals(DVALRecord.class, mockListen.records.get(numRec-3).getClass());
assertEquals(DVRecord.class, mockListen.records.get(numRec-2).getClass());
assertEquals(EOFRecord.class, mockListen.records.get(numRec-1).getClass());
} }
/** /**