refactored HSSFEventFactory to use RecordFactory instead of HSSFRecordStream, see Bugzilla 47448
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@791251 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9c5c51ad29
commit
15ffa9ebda
@ -33,6 +33,7 @@
|
|||||||
|
|
||||||
<changes>
|
<changes>
|
||||||
<release version="3.5-beta7" date="2009-??-??">
|
<release version="3.5-beta7" date="2009-??-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="fix">47448 - Allow HSSFEventFactory to handle non-zero padding at the end of the workbook stream</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">47456 - Support for getting OLE object data in PowerPointExtractor</action>
|
<action dev="POI-DEVELOPERS" type="add">47456 - Support for getting OLE object data in PowerPointExtractor</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">47411 - Explicitly set the 1900 date system when creating XSSF workbooks</action>
|
<action dev="POI-DEVELOPERS" type="fix">47411 - Explicitly set the 1900 date system when creating XSSF workbooks</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">47400 - Support fo text extraction of footnotes, endnotes and comments in HWPF</action>
|
<action dev="POI-DEVELOPERS" type="add">47400 - Support fo text extraction of footnotes, endnotes and comments in HWPF</action>
|
||||||
|
@ -134,7 +134,7 @@ public class HSSFEventFactory
|
|||||||
Record r = null;
|
Record r = null;
|
||||||
|
|
||||||
// Create a new RecordStream and use that
|
// Create a new RecordStream and use that
|
||||||
HSSFRecordStream recordStream = new HSSFRecordStream(in);
|
RecordFactoryInputStream recordStream = new RecordFactoryInputStream(in);
|
||||||
|
|
||||||
// Process each record as they come in
|
// Process each record as they come in
|
||||||
while(going) {
|
while(going) {
|
||||||
|
@ -1,234 +0,0 @@
|
|||||||
/* ====================================================================
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==================================================================== */
|
|
||||||
package org.apache.poi.hssf.eventusermodel;
|
|
||||||
|
|
||||||
import java.util.Vector;
|
|
||||||
|
|
||||||
import org.apache.poi.hssf.record.ContinueRecord;
|
|
||||||
import org.apache.poi.hssf.record.DrawingGroupRecord;
|
|
||||||
import org.apache.poi.hssf.record.DrawingRecord;
|
|
||||||
import org.apache.poi.hssf.record.ObjRecord;
|
|
||||||
import org.apache.poi.hssf.record.Record;
|
|
||||||
import org.apache.poi.hssf.record.RecordFactory;
|
|
||||||
import org.apache.poi.hssf.record.RecordFormatException;
|
|
||||||
import org.apache.poi.hssf.record.RecordInputStream;
|
|
||||||
import org.apache.poi.hssf.record.TextObjectRecord;
|
|
||||||
import org.apache.poi.hssf.record.UnknownRecord;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A stream based way to get at complete records, with
|
|
||||||
* as low a memory footprint as possible.
|
|
||||||
* This handles reading from a RecordInputStream, turning
|
|
||||||
* the data into full records, processing continue records
|
|
||||||
* etc.
|
|
||||||
* Most users should use {@link HSSFEventFactory} /
|
|
||||||
* {@link HSSFListener} and have new records pushed to
|
|
||||||
* them, but this does allow for a "pull" style of coding.
|
|
||||||
*/
|
|
||||||
public class HSSFRecordStream {
|
|
||||||
private RecordInputStream in;
|
|
||||||
|
|
||||||
/** Have we run out of records on the stream? */
|
|
||||||
private boolean hitEOS = false;
|
|
||||||
/** Have we returned all the records there are? */
|
|
||||||
private boolean complete = false;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sometimes we end up with a bunch of
|
|
||||||
* records. When we do, these should
|
|
||||||
* be returned before the next normal
|
|
||||||
* record processing occurs (i.e. before
|
|
||||||
* we check for continue records and
|
|
||||||
* return rec)
|
|
||||||
*/
|
|
||||||
private Vector bonusRecords = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The next record to return, which may need to have its
|
|
||||||
* continue records passed to it before we do
|
|
||||||
*/
|
|
||||||
private Record rec = null;
|
|
||||||
/**
|
|
||||||
* The most recent record that we gave to the user
|
|
||||||
*/
|
|
||||||
private Record lastRec = null;
|
|
||||||
/**
|
|
||||||
* The most recent DrawingRecord seen
|
|
||||||
*/
|
|
||||||
private DrawingRecord lastDrawingRecord = new DrawingRecord();
|
|
||||||
|
|
||||||
public HSSFRecordStream(RecordInputStream inp) {
|
|
||||||
this.in = inp;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the next (complete) record from the
|
|
||||||
* stream, or null if there are no more.
|
|
||||||
*/
|
|
||||||
public Record nextRecord() {
|
|
||||||
Record r = null;
|
|
||||||
|
|
||||||
// Loop until we get something
|
|
||||||
while(r == null && !complete) {
|
|
||||||
// Are there any bonus records that we need to
|
|
||||||
// return?
|
|
||||||
r = getBonusRecord();
|
|
||||||
|
|
||||||
// If not, ask for the next real record
|
|
||||||
if(r == null) {
|
|
||||||
r = getNextRecord();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// All done
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* If there are any "bonus" records, that should
|
|
||||||
* be returned before processing new ones,
|
|
||||||
* grabs the next and returns it.
|
|
||||||
* If not, returns null;
|
|
||||||
*/
|
|
||||||
private Record getBonusRecord() {
|
|
||||||
if(bonusRecords != null) {
|
|
||||||
Record r = (Record)bonusRecords.remove(0);
|
|
||||||
if(bonusRecords.size() == 0) {
|
|
||||||
bonusRecords = null;
|
|
||||||
}
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the next available record, or null if
|
|
||||||
* this pass didn't return a record that's
|
|
||||||
* suitable for returning (eg was a continue record).
|
|
||||||
*/
|
|
||||||
private Record getNextRecord() {
|
|
||||||
Record toReturn = null;
|
|
||||||
|
|
||||||
if(in.hasNextRecord()) {
|
|
||||||
// Grab our next record
|
|
||||||
in.nextRecord();
|
|
||||||
short sid = in.getSid();
|
|
||||||
|
|
||||||
//
|
|
||||||
// for some reasons we have to make the workbook to be at least 4096 bytes
|
|
||||||
// but if we have such workbook we fill the end of it with zeros (many zeros)
|
|
||||||
//
|
|
||||||
// it is not good:
|
|
||||||
// if the length( all zero records ) % 4 = 1
|
|
||||||
// e.g.: any zero record would be readed as 4 bytes at once ( 2 - id and 2 - size ).
|
|
||||||
// And the last 1 byte will be readed WRONG ( the id must be 2 bytes )
|
|
||||||
//
|
|
||||||
// So we should better to check if the sid is zero and not to read more data
|
|
||||||
// The zero sid shows us that rest of the stream data is a fake to make workbook
|
|
||||||
// certain size
|
|
||||||
//
|
|
||||||
if ( sid == 0 )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
|
|
||||||
// If we had a last record, and this one
|
|
||||||
// isn't a continue record, then pass
|
|
||||||
// it on to the listener
|
|
||||||
if ((rec != null) && (sid != ContinueRecord.sid))
|
|
||||||
{
|
|
||||||
// This last record ought to be returned
|
|
||||||
toReturn = rec;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If this record isn't a continue record,
|
|
||||||
// then build it up
|
|
||||||
if (sid != ContinueRecord.sid)
|
|
||||||
{
|
|
||||||
//System.out.println("creating "+sid);
|
|
||||||
Record[] recs = RecordFactory.createRecord(in);
|
|
||||||
|
|
||||||
// We know that the multiple record situations
|
|
||||||
// don't contain continue records, so just
|
|
||||||
// pass those on to the listener now
|
|
||||||
if (recs.length > 1) {
|
|
||||||
bonusRecords = new Vector(recs.length-1);
|
|
||||||
for (int k = 0; k < (recs.length - 1); k++) {
|
|
||||||
bonusRecords.add(recs[k]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Regardless of the number we created, always hold
|
|
||||||
// onto the last record to be processed on the next
|
|
||||||
// loop, in case it has any continue records
|
|
||||||
rec = recs[ recs.length - 1 ];
|
|
||||||
// Don't return it just yet though, as we probably have
|
|
||||||
// a record from the last round to return
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// Normally, ContinueRecords are handled internally
|
|
||||||
// However, in a few cases, there is a gap between a record at
|
|
||||||
// its Continue, so we have to handle them specially
|
|
||||||
// This logic is much like in RecordFactory.createRecords()
|
|
||||||
Record[] recs = RecordFactory.createRecord(in);
|
|
||||||
ContinueRecord crec = (ContinueRecord)recs[0];
|
|
||||||
if((lastRec instanceof ObjRecord) || (lastRec instanceof TextObjectRecord)) {
|
|
||||||
// You can have Obj records between a DrawingRecord
|
|
||||||
// and its continue!
|
|
||||||
lastDrawingRecord.processContinueRecord( crec.getData() );
|
|
||||||
// Trigger them on the drawing record, now it's complete
|
|
||||||
rec = lastDrawingRecord;
|
|
||||||
}
|
|
||||||
else if((lastRec instanceof DrawingGroupRecord)) {
|
|
||||||
((DrawingGroupRecord)lastRec).processContinueRecord(crec.getData());
|
|
||||||
// Trigger them on the drawing record, now it's complete
|
|
||||||
rec = lastRec;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (rec instanceof UnknownRecord) {
|
|
||||||
;//silently skip records we don't know about
|
|
||||||
} else {
|
|
||||||
throw new RecordFormatException("Records should handle ContinueRecord internally. Should not see this exception");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update our tracking of the last record
|
|
||||||
lastRec = rec;
|
|
||||||
if(rec instanceof DrawingRecord) {
|
|
||||||
lastDrawingRecord = (DrawingRecord)rec;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// No more records
|
|
||||||
hitEOS = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we've hit the end-of-stream, then
|
|
||||||
// finish off the last record and be done
|
|
||||||
if(hitEOS) {
|
|
||||||
complete = true;
|
|
||||||
|
|
||||||
// Return the last record if there was
|
|
||||||
// one, otherwise null
|
|
||||||
if(rec != null) {
|
|
||||||
toReturn = rec;
|
|
||||||
rec = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return toReturn;
|
|
||||||
}
|
|
||||||
}
|
|
@ -17,21 +17,14 @@
|
|||||||
|
|
||||||
package org.apache.poi.hssf.record;
|
package org.apache.poi.hssf.record;
|
||||||
|
|
||||||
|
import org.apache.poi.hssf.record.chart.*;
|
||||||
|
import org.apache.poi.hssf.record.pivottable.*;
|
||||||
|
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.lang.reflect.Constructor;
|
import java.lang.reflect.Constructor;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
import java.lang.reflect.InvocationTargetException;
|
||||||
import java.lang.reflect.Modifier;
|
import java.lang.reflect.Modifier;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.poi.hssf.record.chart.*;
|
|
||||||
import org.apache.poi.hssf.record.pivottable.*;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Title: Record Factory<P>
|
* Title: Record Factory<P>
|
||||||
@ -259,7 +252,7 @@ public final class RecordFactory {
|
|||||||
return new Record[] { record, };
|
return new Record[] { record, };
|
||||||
}
|
}
|
||||||
|
|
||||||
static Record createSingleRecord(RecordInputStream in) {
|
public static Record createSingleRecord(RecordInputStream in) {
|
||||||
I_RecordCreator constructor = _recordCreatorsById.get(new Integer(in.getSid()));
|
I_RecordCreator constructor = _recordCreatorsById.get(new Integer(in.getSid()));
|
||||||
|
|
||||||
if (constructor == null) {
|
if (constructor == null) {
|
||||||
@ -273,7 +266,7 @@ public final class RecordFactory {
|
|||||||
* RK record is a slightly smaller alternative to NumberRecord
|
* RK record is a slightly smaller alternative to NumberRecord
|
||||||
* POI likes NumberRecord better
|
* POI likes NumberRecord better
|
||||||
*/
|
*/
|
||||||
private static NumberRecord convertToNumberRecord(RKRecord rk) {
|
public static NumberRecord convertToNumberRecord(RKRecord rk) {
|
||||||
NumberRecord num = new NumberRecord();
|
NumberRecord num = new NumberRecord();
|
||||||
|
|
||||||
num.setColumn(rk.getColumn());
|
num.setColumn(rk.getColumn());
|
||||||
@ -286,7 +279,7 @@ public final class RecordFactory {
|
|||||||
/**
|
/**
|
||||||
* Converts a {@link MulRKRecord} into an equivalent array of {@link NumberRecord}s
|
* Converts a {@link MulRKRecord} into an equivalent array of {@link NumberRecord}s
|
||||||
*/
|
*/
|
||||||
private static NumberRecord[] convertRKRecords(MulRKRecord mrk) {
|
public static NumberRecord[] convertRKRecords(MulRKRecord mrk) {
|
||||||
|
|
||||||
NumberRecord[] mulRecs = new NumberRecord[mrk.getNumColumns()];
|
NumberRecord[] mulRecs = new NumberRecord[mrk.getNumColumns()];
|
||||||
for (int k = 0; k < mrk.getNumColumns(); k++) {
|
for (int k = 0; k < mrk.getNumColumns(); k++) {
|
||||||
@ -374,109 +367,16 @@ public final class RecordFactory {
|
|||||||
* @exception RecordFormatException on error processing the InputStream
|
* @exception RecordFormatException on error processing the InputStream
|
||||||
*/
|
*/
|
||||||
public static List<Record> createRecords(InputStream in) throws RecordFormatException {
|
public static List<Record> createRecords(InputStream in) throws RecordFormatException {
|
||||||
|
|
||||||
List<Record> records = new ArrayList<Record>(NUM_RECORDS);
|
List<Record> records = new ArrayList<Record>(NUM_RECORDS);
|
||||||
|
|
||||||
RecordInputStream recStream = new RecordInputStream(in);
|
RecordFactoryInputStream recStream = new RecordFactoryInputStream(new RecordInputStream(in));
|
||||||
DrawingRecord lastDrawingRecord = new DrawingRecord( );
|
recStream.setIncludeContinueRecords(true);
|
||||||
Record lastRecord = null;
|
|
||||||
/*
|
|
||||||
* How to recognise end of stream?
|
|
||||||
* In the best case, the underlying input stream (in) ends just after the last EOF record
|
|
||||||
* Usually however, the stream is padded with an arbitrary byte count. Excel and most apps
|
|
||||||
* reliably use zeros for padding and if this were always the case, this code could just
|
|
||||||
* skip all the (zero sized) records with sid==0. However, bug 46987 shows a file with
|
|
||||||
* non-zero padding that is read OK by Excel (Excel also fixes the padding).
|
|
||||||
*
|
|
||||||
* So to properly detect the workbook end of stream, this code has to identify the last
|
|
||||||
* EOF record. This is not so easy because the worbook bof+eof pair do not bracket the
|
|
||||||
* whole stream. The worksheets follow the workbook, but it is not easy to tell how many
|
|
||||||
* sheet sub-streams should be present. Hence we are looking for an EOF record that is not
|
|
||||||
* immediately followed by a BOF record. One extra complication is that bof+eof sub-
|
|
||||||
* streams can be nested within worksheet streams and it's not clear in these cases what
|
|
||||||
* record might follow any EOF record. So we also need to keep track of the bof/eof
|
|
||||||
* nesting level.
|
|
||||||
*/
|
|
||||||
|
|
||||||
int bofDepth=0;
|
Record record;
|
||||||
boolean lastRecordWasEOFLevelZero = false;
|
while ((record = recStream.nextRecord())!=null) {
|
||||||
while (recStream.hasNextRecord()) {
|
|
||||||
recStream.nextRecord();
|
|
||||||
if (lastRecordWasEOFLevelZero && recStream.getSid() != BOFRecord.sid) {
|
|
||||||
// Normally InputStream (in) contains only zero padding after this point
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Record record = createSingleRecord(recStream);
|
|
||||||
lastRecordWasEOFLevelZero = false;
|
|
||||||
if (record instanceof BOFRecord) {
|
|
||||||
bofDepth++;
|
|
||||||
records.add(record);
|
records.add(record);
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (record instanceof EOFRecord) {
|
|
||||||
bofDepth--;
|
|
||||||
records.add(record);
|
|
||||||
if (bofDepth<1) {
|
|
||||||
lastRecordWasEOFLevelZero = true;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (record instanceof DBCellRecord) {
|
|
||||||
// Not needed by POI. Regenerated from scratch by POI when spreadsheet is written
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (record instanceof RKRecord) {
|
|
||||||
records.add(convertToNumberRecord((RKRecord) record));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (record instanceof MulRKRecord) {
|
|
||||||
addAll(records, convertRKRecords((MulRKRecord)record));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (record.getSid() == DrawingGroupRecord.sid
|
|
||||||
&& lastRecord instanceof DrawingGroupRecord) {
|
|
||||||
DrawingGroupRecord lastDGRecord = (DrawingGroupRecord) lastRecord;
|
|
||||||
lastDGRecord.join((AbstractEscherHolderRecord) record);
|
|
||||||
} else if (record.getSid() == ContinueRecord.sid) {
|
|
||||||
ContinueRecord contRec = (ContinueRecord)record;
|
|
||||||
|
|
||||||
if (lastRecord instanceof ObjRecord || lastRecord instanceof TextObjectRecord) {
|
|
||||||
// Drawing records have a very strange continue behaviour.
|
|
||||||
//There can actually be OBJ records mixed between the continues.
|
|
||||||
lastDrawingRecord.processContinueRecord(contRec.getData() );
|
|
||||||
//we must remember the position of the continue record.
|
|
||||||
//in the serialization procedure the original structure of records must be preserved
|
|
||||||
records.add(record);
|
|
||||||
} else if (lastRecord instanceof DrawingGroupRecord) {
|
|
||||||
((DrawingGroupRecord)lastRecord).processContinueRecord(contRec.getData());
|
|
||||||
} else if (lastRecord instanceof UnknownRecord) {
|
|
||||||
//Gracefully handle records that we don't know about,
|
|
||||||
//that happen to be continued
|
|
||||||
records.add(record);
|
|
||||||
} else if (lastRecord instanceof EOFRecord) {
|
|
||||||
// This is really odd, but excel still sometimes
|
|
||||||
// outputs a file like this all the same
|
|
||||||
records.add(record);
|
|
||||||
} else {
|
|
||||||
throw new RecordFormatException("Unhandled Continue Record");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
lastRecord = record;
|
|
||||||
if (record instanceof DrawingRecord) {
|
|
||||||
lastDrawingRecord = (DrawingRecord) record;
|
|
||||||
}
|
|
||||||
records.add(record);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return records;
|
return records;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void addAll(List<Record> destList, Record[] srcRecs) {
|
|
||||||
for (int i = 0; i < srcRecs.length; i++) {
|
|
||||||
destList.add(srcRecs[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
233
src/java/org/apache/poi/hssf/record/RecordFactoryInputStream.java
Executable file
233
src/java/org/apache/poi/hssf/record/RecordFactoryInputStream.java
Executable file
@ -0,0 +1,233 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.hssf.record;
|
||||||
|
|
||||||
|
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
|
||||||
|
import org.apache.poi.hssf.eventusermodel.HSSFListener;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A stream based way to get at complete records, with
|
||||||
|
* as low a memory footprint as possible.
|
||||||
|
* This handles reading from a RecordInputStream, turning
|
||||||
|
* the data into full records, processing continue records
|
||||||
|
* etc.
|
||||||
|
* Most users should use {@link HSSFEventFactory} /
|
||||||
|
* {@link HSSFListener} and have new records pushed to
|
||||||
|
* them, but this does allow for a "pull" style of coding.
|
||||||
|
*/
|
||||||
|
public class RecordFactoryInputStream {
|
||||||
|
private final RecordInputStream recStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Have we returned all the records there are?
|
||||||
|
*/
|
||||||
|
private boolean complete = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sometimes we end up with a bunch of
|
||||||
|
* records. When we do, these should
|
||||||
|
* be returned before the next normal
|
||||||
|
* record processing occurs (i.e. before
|
||||||
|
* we check for continue records and
|
||||||
|
* return rec)
|
||||||
|
*/
|
||||||
|
private final LinkedList bonusRecords = new LinkedList();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The most recent record that we gave to the user
|
||||||
|
*/
|
||||||
|
private Record lastRecord = null;
|
||||||
|
/**
|
||||||
|
* The most recent DrawingRecord seen
|
||||||
|
*/
|
||||||
|
private DrawingRecord lastDrawingRecord = new DrawingRecord();
|
||||||
|
|
||||||
|
private int bofDepth = 0;
|
||||||
|
|
||||||
|
private boolean lastRecordWasEOFLevelZero = false;
|
||||||
|
|
||||||
|
private boolean includeContinueRecords = false;
|
||||||
|
|
||||||
|
public RecordFactoryInputStream(RecordInputStream inp) {
|
||||||
|
recStream = inp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the next (complete) record from the
|
||||||
|
* stream, or null if there are no more.
|
||||||
|
*/
|
||||||
|
public Record nextRecord() {
|
||||||
|
Record r = null;
|
||||||
|
|
||||||
|
// Loop until we get something
|
||||||
|
while (r == null && !complete) {
|
||||||
|
// Are there any bonus records that we need to
|
||||||
|
// return?
|
||||||
|
r = getBonusRecord();
|
||||||
|
|
||||||
|
// If not, ask for the next real record
|
||||||
|
if (r == null) {
|
||||||
|
r = getNextRecord();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// All done
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If there are any "bonus" records, that should
|
||||||
|
* be returned before processing new ones,
|
||||||
|
* grabs the next and returns it.
|
||||||
|
* If not, returns null;
|
||||||
|
*/
|
||||||
|
private Record getBonusRecord() {
|
||||||
|
if (!bonusRecords.isEmpty()) {
|
||||||
|
return (Record) bonusRecords.removeFirst();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the next available record, or null if
|
||||||
|
* this pass didn't return a record that's
|
||||||
|
* suitable for returning (eg was a continue record).
|
||||||
|
*/
|
||||||
|
private Record getNextRecord() {
|
||||||
|
/*
|
||||||
|
* How to recognise end of stream?
|
||||||
|
* In the best case, the underlying input stream (in) ends just after the last EOF record
|
||||||
|
* Usually however, the stream is padded with an arbitrary byte count. Excel and most apps
|
||||||
|
* reliably use zeros for padding and if this were always the case, this code could just
|
||||||
|
* skip all the (zero sized) records with sid==0. However, bug 46987 shows a file with
|
||||||
|
* non-zero padding that is read OK by Excel (Excel also fixes the padding).
|
||||||
|
*
|
||||||
|
* So to properly detect the workbook end of stream, this code has to identify the last
|
||||||
|
* EOF record. This is not so easy because the worbook bof+eof pair do not bracket the
|
||||||
|
* whole stream. The worksheets follow the workbook, but it is not easy to tell how many
|
||||||
|
* sheet sub-streams should be present. Hence we are looking for an EOF record that is not
|
||||||
|
* immediately followed by a BOF record. One extra complication is that bof+eof sub-
|
||||||
|
* streams can be nested within worksheet streams and it's not clear in these cases what
|
||||||
|
* record might follow any EOF record. So we also need to keep track of the bof/eof
|
||||||
|
* nesting level.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (recStream.hasNextRecord()) {
|
||||||
|
// Grab our next record
|
||||||
|
recStream.nextRecord();
|
||||||
|
|
||||||
|
if (lastRecordWasEOFLevelZero && recStream.getSid() != BOFRecord.sid) {
|
||||||
|
// Normally InputStream (in) contains only zero padding after this point
|
||||||
|
complete = true;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
Record record = RecordFactory.createSingleRecord(recStream);
|
||||||
|
lastRecordWasEOFLevelZero = false;
|
||||||
|
|
||||||
|
if (record instanceof BOFRecord) {
|
||||||
|
bofDepth++;
|
||||||
|
return record;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (record instanceof EOFRecord) {
|
||||||
|
bofDepth--;
|
||||||
|
if (bofDepth < 1) {
|
||||||
|
lastRecordWasEOFLevelZero = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return record;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (record instanceof DBCellRecord) {
|
||||||
|
// Not needed by POI. Regenerated from scratch by POI when spreadsheet is written
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (record instanceof RKRecord) {
|
||||||
|
return RecordFactory.convertToNumberRecord((RKRecord) record);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (record instanceof MulRKRecord) {
|
||||||
|
NumberRecord[] records = RecordFactory.convertRKRecords((MulRKRecord) record);
|
||||||
|
|
||||||
|
List<NumberRecord> list = Arrays.asList(records);
|
||||||
|
bonusRecords.addAll(list.subList(1, list.size()));
|
||||||
|
|
||||||
|
return records[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (record.getSid() == DrawingGroupRecord.sid
|
||||||
|
&& lastRecord instanceof DrawingGroupRecord) {
|
||||||
|
DrawingGroupRecord lastDGRecord = (DrawingGroupRecord) lastRecord;
|
||||||
|
lastDGRecord.join((AbstractEscherHolderRecord) record);
|
||||||
|
return null;
|
||||||
|
} else if (record.getSid() == ContinueRecord.sid) {
|
||||||
|
ContinueRecord contRec = (ContinueRecord) record;
|
||||||
|
|
||||||
|
if (lastRecord instanceof ObjRecord || lastRecord instanceof TextObjectRecord) {
|
||||||
|
// Drawing records have a very strange continue behaviour.
|
||||||
|
//There can actually be OBJ records mixed between the continues.
|
||||||
|
lastDrawingRecord.processContinueRecord(contRec.getData());
|
||||||
|
//we must remember the position of the continue record.
|
||||||
|
//in the serialization procedure the original structure of records must be preserved
|
||||||
|
if (includeContinueRecords) {
|
||||||
|
return record;
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
} else if (lastRecord instanceof DrawingGroupRecord) {
|
||||||
|
((DrawingGroupRecord) lastRecord).processContinueRecord(contRec.getData());
|
||||||
|
return null;
|
||||||
|
} else if (lastRecord instanceof UnknownRecord) {
|
||||||
|
//Gracefully handle records that we don't know about,
|
||||||
|
//that happen to be continued
|
||||||
|
return record;
|
||||||
|
} else if (lastRecord instanceof EOFRecord) {
|
||||||
|
// This is really odd, but excel still sometimes
|
||||||
|
// outputs a file like this all the same
|
||||||
|
return record;
|
||||||
|
} else {
|
||||||
|
throw new RecordFormatException("Unhandled Continue Record");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
lastRecord = record;
|
||||||
|
if (record instanceof DrawingRecord) {
|
||||||
|
lastDrawingRecord = (DrawingRecord) record;
|
||||||
|
}
|
||||||
|
|
||||||
|
return record;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// No more records
|
||||||
|
complete = true;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return or not ContinueRecord in nextRecord
|
||||||
|
*/
|
||||||
|
public void setIncludeContinueRecords(boolean includeContinueRecords) {
|
||||||
|
this.includeContinueRecords = includeContinueRecords;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user