Fix for bug 46987 - allow RecordFactory to handle non-zero padding at the end of the workbook stream
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@765866 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
10ee56f313
commit
f41239ad03
@ -37,6 +37,7 @@
|
||||
|
||||
<!-- Don't forget to update status.xml too! -->
|
||||
<release version="3.5-beta6" date="2009-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="fix">46987 - Allow RecordFactory to handle non-zero padding at the end of the workbook stream</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47034 - Fix reading the name of a NameRecord when the name is very long</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47001 - Fixed WriteAccessRecord and LinkTable to handle unusual format written by Google Docs</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">46973 - Fixed defined names to behave better when refersToFormula is unset</action>
|
||||
|
@ -34,6 +34,7 @@
|
||||
<!-- Don't forget to update changes.xml too! -->
|
||||
<changes>
|
||||
<release version="3.5-beta6" date="2009-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="fix">46987 - Allow RecordFactory to handle non-zero padding at the end of the workbook stream</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47034 - Fix reading the name of a NameRecord when the name is very long</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47001 - Fixed WriteAccessRecord and LinkTable to handle unusual format written by Google Docs</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">46973 - Fixed defined names to behave better when refersToFormula is unset</action>
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
package org.apache.poi.hssf.record;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
@ -32,6 +33,7 @@ import java.util.Set;
|
||||
|
||||
import org.apache.poi.hssf.record.chart.*;
|
||||
import org.apache.poi.hssf.record.pivottable.*;
|
||||
import org.apache.poi.util.HexDump;
|
||||
|
||||
/**
|
||||
* Title: Record Factory<P>
|
||||
@ -348,6 +350,22 @@ public final class RecordFactory {
|
||||
return result;
|
||||
}
|
||||
|
||||
private static void checkZeros(InputStream in, int avail) throws IOException {
|
||||
int count=0;
|
||||
while(true) {
|
||||
int b = in.read();
|
||||
if (b < 0) {
|
||||
break;
|
||||
}
|
||||
if (b!=0) {
|
||||
System.err.print(HexDump.byteToHex(b));
|
||||
}
|
||||
count++;
|
||||
}
|
||||
if (avail != count) {
|
||||
System.err.println("avail!=count (" + avail + "!=" + count + ").");
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Create an array of records from an input stream
|
||||
*
|
||||
@ -364,13 +382,47 @@ public final class RecordFactory {
|
||||
RecordInputStream recStream = new RecordInputStream(in);
|
||||
DrawingRecord lastDrawingRecord = new DrawingRecord( );
|
||||
Record lastRecord = null;
|
||||
/*
|
||||
* How to recognise end of stream?
|
||||
* In the best case, the underlying input stream (in) ends just after the last EOF record
|
||||
* Usually however, the stream is padded with an arbitrary byte count. Excel and most apps
|
||||
* reliably use zeros for padding and if this were always the case, this code could just
|
||||
* skip all the (zero sized) records with sid==0. However, bug 46987 shows a file with
|
||||
* non-zero padding that is read OK by Excel (Excel also fixes the padding).
|
||||
*
|
||||
* So to properly detect the workbook end of stream, this code has to identify the last
|
||||
* EOF record. This is not so easy because the worbook bof+eof pair do not bracket the
|
||||
* whole stream. The worksheets follow the workbook, but it is not easy to tell how many
|
||||
* sheet sub-streams should be present. Hence we are looking for an EOF record that is not
|
||||
* immediately followed by a BOF record. One extra complication is that bof+eof sub-
|
||||
* streams can be nested within worksheet streams and it's not clear in these cases what
|
||||
* record might follow any EOF record. So we also need to keep track of the bof/eof
|
||||
* nesting level.
|
||||
*/
|
||||
|
||||
int bofDepth=0;
|
||||
boolean lastRecordWasEOFLevelZero = false;
|
||||
while (recStream.hasNextRecord()) {
|
||||
recStream.nextRecord();
|
||||
if (recStream.getSid() == 0) {
|
||||
// After EOF, Excel seems to pad block with zeros
|
||||
continue;
|
||||
if (lastRecordWasEOFLevelZero && recStream.getSid() != BOFRecord.sid) {
|
||||
// Normally InputStream (in) contains only zero padding after this point
|
||||
break;
|
||||
}
|
||||
Record record = createSingleRecord(recStream);
|
||||
lastRecordWasEOFLevelZero = false;
|
||||
if (record instanceof BOFRecord) {
|
||||
bofDepth++;
|
||||
records.add(record);
|
||||
continue;
|
||||
}
|
||||
if (record instanceof EOFRecord) {
|
||||
bofDepth--;
|
||||
records.add(record);
|
||||
if (bofDepth<1) {
|
||||
lastRecordWasEOFLevelZero = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (record instanceof DBCellRecord) {
|
||||
// Not needed by POI. Regenerated from scratch by POI when spreadsheet is written
|
||||
|
@ -19,11 +19,16 @@ package org.apache.poi.hssf.record;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.util.List;
|
||||
import java.util.Iterator;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import junit.framework.AssertionFailedError;
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.util.HexRead;
|
||||
|
||||
/**
|
||||
@ -63,8 +68,7 @@ public final class TestRecordFactory extends TestCase {
|
||||
assertEquals(5, bofRecord.getType());
|
||||
assertEquals(1536, bofRecord.getVersion());
|
||||
recType = MMSRecord.sid;
|
||||
data = new byte[]
|
||||
{
|
||||
data = new byte[] {
|
||||
0, 0
|
||||
};
|
||||
record = RecordFactory.createRecord(TestcaseRecordInputStream.create(recType, data));
|
||||
@ -201,7 +205,50 @@ public final class TestRecordFactory extends TestCase {
|
||||
assertTrue(Arrays.equals(data, ser));
|
||||
}
|
||||
|
||||
public static void main(String [] ignored_args) {
|
||||
junit.textui.TestRunner.run(TestRecordFactory.class);
|
||||
public void testNonZeroPadding_bug46987() {
|
||||
Record[] recs = {
|
||||
new BOFRecord(),
|
||||
EOFRecord.instance,
|
||||
BOFRecord.createSheetBOF(),
|
||||
EOFRecord.instance,
|
||||
};
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
for (int i = 0; i < recs.length; i++) {
|
||||
try {
|
||||
baos.write(recs[i].serialize());
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
//simulate the bad padding at the end of the workbook stream in attachment 23483 of bug 46987
|
||||
baos.write(0x00);
|
||||
baos.write(0x11);
|
||||
baos.write(0x00);
|
||||
baos.write(0x02);
|
||||
for (int i = 0; i < 192; i++) {
|
||||
baos.write(0x00);
|
||||
}
|
||||
|
||||
|
||||
POIFSFileSystem fs = new POIFSFileSystem();
|
||||
InputStream is;
|
||||
try {
|
||||
fs.createDocument(new ByteArrayInputStream(baos.toByteArray()), "dummy");
|
||||
is = fs.getRoot().createDocumentInputStream("dummy");
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
List<Record> outRecs;
|
||||
try {
|
||||
outRecs = RecordFactory.createRecords(is);
|
||||
} catch (RuntimeException e) {
|
||||
if (e.getMessage().equals("Buffer underrun - requested 512 bytes but 192 was available")) {
|
||||
throw new AssertionFailedError("Identified bug 46987");
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
assertEquals(4, outRecs.size());
|
||||
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user