Fix for bug 46987 - allow RecordFactory to handle non-zero padding at the end of the workbook stream

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@765866 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Josh Micich 2009-04-17 06:53:49 +00:00
parent 10ee56f313
commit f41239ad03
4 changed files with 264 additions and 163 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! --> <!-- Don't forget to update status.xml too! -->
<release version="3.5-beta6" date="2009-??-??"> <release version="3.5-beta6" date="2009-??-??">
<action dev="POI-DEVELOPERS" type="fix">46987 - Allow RecordFactory to handle non-zero padding at the end of the workbook stream</action>
<action dev="POI-DEVELOPERS" type="fix">47034 - Fix reading the name of a NameRecord when the name is very long</action> <action dev="POI-DEVELOPERS" type="fix">47034 - Fix reading the name of a NameRecord when the name is very long</action>
<action dev="POI-DEVELOPERS" type="fix">47001 - Fixed WriteAccessRecord and LinkTable to handle unusual format written by Google Docs</action> <action dev="POI-DEVELOPERS" type="fix">47001 - Fixed WriteAccessRecord and LinkTable to handle unusual format written by Google Docs</action>
<action dev="POI-DEVELOPERS" type="fix">46973 - Fixed defined names to behave better when refersToFormula is unset</action> <action dev="POI-DEVELOPERS" type="fix">46973 - Fixed defined names to behave better when refersToFormula is unset</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! --> <!-- Don't forget to update changes.xml too! -->
<changes> <changes>
<release version="3.5-beta6" date="2009-??-??"> <release version="3.5-beta6" date="2009-??-??">
<action dev="POI-DEVELOPERS" type="fix">46987 - Allow RecordFactory to handle non-zero padding at the end of the workbook stream</action>
<action dev="POI-DEVELOPERS" type="fix">47034 - Fix reading the name of a NameRecord when the name is very long</action> <action dev="POI-DEVELOPERS" type="fix">47034 - Fix reading the name of a NameRecord when the name is very long</action>
<action dev="POI-DEVELOPERS" type="fix">47001 - Fixed WriteAccessRecord and LinkTable to handle unusual format written by Google Docs</action> <action dev="POI-DEVELOPERS" type="fix">47001 - Fixed WriteAccessRecord and LinkTable to handle unusual format written by Google Docs</action>
<action dev="POI-DEVELOPERS" type="fix">46973 - Fixed defined names to behave better when refersToFormula is unset</action> <action dev="POI-DEVELOPERS" type="fix">46973 - Fixed defined names to behave better when refersToFormula is unset</action>

View File

@ -17,6 +17,7 @@
package org.apache.poi.hssf.record; package org.apache.poi.hssf.record;
import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.lang.reflect.Constructor; import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException; import java.lang.reflect.InvocationTargetException;
@ -32,6 +33,7 @@ import java.util.Set;
import org.apache.poi.hssf.record.chart.*; import org.apache.poi.hssf.record.chart.*;
import org.apache.poi.hssf.record.pivottable.*; import org.apache.poi.hssf.record.pivottable.*;
import org.apache.poi.util.HexDump;
/** /**
* Title: Record Factory<P> * Title: Record Factory<P>
@ -348,6 +350,22 @@ public final class RecordFactory {
return result; return result;
} }
private static void checkZeros(InputStream in, int avail) throws IOException {
int count=0;
while(true) {
int b = in.read();
if (b < 0) {
break;
}
if (b!=0) {
System.err.print(HexDump.byteToHex(b));
}
count++;
}
if (avail != count) {
System.err.println("avail!=count (" + avail + "!=" + count + ").");
}
}
/** /**
* Create an array of records from an input stream * Create an array of records from an input stream
* *
@ -364,13 +382,47 @@ public final class RecordFactory {
RecordInputStream recStream = new RecordInputStream(in); RecordInputStream recStream = new RecordInputStream(in);
DrawingRecord lastDrawingRecord = new DrawingRecord( ); DrawingRecord lastDrawingRecord = new DrawingRecord( );
Record lastRecord = null; Record lastRecord = null;
/*
* How to recognise end of stream?
* In the best case, the underlying input stream (in) ends just after the last EOF record
* Usually however, the stream is padded with an arbitrary byte count. Excel and most apps
* reliably use zeros for padding and if this were always the case, this code could just
* skip all the (zero sized) records with sid==0. However, bug 46987 shows a file with
* non-zero padding that is read OK by Excel (Excel also fixes the padding).
*
* So to properly detect the workbook end of stream, this code has to identify the last
* EOF record. This is not so easy because the worbook bof+eof pair do not bracket the
* whole stream. The worksheets follow the workbook, but it is not easy to tell how many
* sheet sub-streams should be present. Hence we are looking for an EOF record that is not
* immediately followed by a BOF record. One extra complication is that bof+eof sub-
* streams can be nested within worksheet streams and it's not clear in these cases what
* record might follow any EOF record. So we also need to keep track of the bof/eof
* nesting level.
*/
int bofDepth=0;
boolean lastRecordWasEOFLevelZero = false;
while (recStream.hasNextRecord()) { while (recStream.hasNextRecord()) {
recStream.nextRecord(); recStream.nextRecord();
if (recStream.getSid() == 0) { if (lastRecordWasEOFLevelZero && recStream.getSid() != BOFRecord.sid) {
// After EOF, Excel seems to pad block with zeros // Normally InputStream (in) contains only zero padding after this point
continue; break;
} }
Record record = createSingleRecord(recStream); Record record = createSingleRecord(recStream);
lastRecordWasEOFLevelZero = false;
if (record instanceof BOFRecord) {
bofDepth++;
records.add(record);
continue;
}
if (record instanceof EOFRecord) {
bofDepth--;
records.add(record);
if (bofDepth<1) {
lastRecordWasEOFLevelZero = true;
}
continue;
}
if (record instanceof DBCellRecord) { if (record instanceof DBCellRecord) {
// Not needed by POI. Regenerated from scratch by POI when spreadsheet is written // Not needed by POI. Regenerated from scratch by POI when spreadsheet is written

View File

@ -19,11 +19,16 @@ package org.apache.poi.hssf.record;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.util.List; import java.io.IOException;
import java.util.Iterator; import java.io.InputStream;
import java.util.Arrays; import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import junit.framework.AssertionFailedError;
import junit.framework.TestCase; import junit.framework.TestCase;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.HexRead; import org.apache.poi.util.HexRead;
/** /**
@ -63,8 +68,7 @@ public final class TestRecordFactory extends TestCase {
assertEquals(5, bofRecord.getType()); assertEquals(5, bofRecord.getType());
assertEquals(1536, bofRecord.getVersion()); assertEquals(1536, bofRecord.getVersion());
recType = MMSRecord.sid; recType = MMSRecord.sid;
data = new byte[] data = new byte[] {
{
0, 0 0, 0
}; };
record = RecordFactory.createRecord(TestcaseRecordInputStream.create(recType, data)); record = RecordFactory.createRecord(TestcaseRecordInputStream.create(recType, data));
@ -201,7 +205,50 @@ public final class TestRecordFactory extends TestCase {
assertTrue(Arrays.equals(data, ser)); assertTrue(Arrays.equals(data, ser));
} }
public static void main(String [] ignored_args) { public void testNonZeroPadding_bug46987() {
junit.textui.TestRunner.run(TestRecordFactory.class); Record[] recs = {
new BOFRecord(),
EOFRecord.instance,
BOFRecord.createSheetBOF(),
EOFRecord.instance,
};
ByteArrayOutputStream baos = new ByteArrayOutputStream();
for (int i = 0; i < recs.length; i++) {
try {
baos.write(recs[i].serialize());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
//simulate the bad padding at the end of the workbook stream in attachment 23483 of bug 46987
baos.write(0x00);
baos.write(0x11);
baos.write(0x00);
baos.write(0x02);
for (int i = 0; i < 192; i++) {
baos.write(0x00);
}
POIFSFileSystem fs = new POIFSFileSystem();
InputStream is;
try {
fs.createDocument(new ByteArrayInputStream(baos.toByteArray()), "dummy");
is = fs.getRoot().createDocumentInputStream("dummy");
} catch (IOException e) {
throw new RuntimeException(e);
}
List<Record> outRecs;
try {
outRecs = RecordFactory.createRecords(is);
} catch (RuntimeException e) {
if (e.getMessage().equals("Buffer underrun - requested 512 bytes but 192 was available")) {
throw new AssertionFailedError("Identified bug 46987");
}
throw e;
}
assertEquals(4, outRecs.size());
} }
} }