bug 61045 -- allow for (and log!) extra bytes in FormatRecord.
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1799360 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fe5f8fb54c
commit
649280df8c
@ -20,6 +20,8 @@
|
|||||||
package org.apache.poi.hssf.record;
|
package org.apache.poi.hssf.record;
|
||||||
|
|
||||||
import org.apache.poi.util.LittleEndianOutput;
|
import org.apache.poi.util.LittleEndianOutput;
|
||||||
|
import org.apache.poi.util.POILogFactory;
|
||||||
|
import org.apache.poi.util.POILogger;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Title: Dimensions Record<P>
|
* Title: Dimensions Record<P>
|
||||||
@ -32,6 +34,9 @@ import org.apache.poi.util.LittleEndianOutput;
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
public final class DimensionsRecord extends StandardRecord implements Cloneable {
|
public final class DimensionsRecord extends StandardRecord implements Cloneable {
|
||||||
|
|
||||||
|
private static final POILogger logger = POILogFactory.getLogger(DimensionsRecord.class);
|
||||||
|
|
||||||
public final static short sid = 0x200;
|
public final static short sid = 0x200;
|
||||||
private int field_1_first_row;
|
private int field_1_first_row;
|
||||||
private int field_2_last_row; // plus 1
|
private int field_2_last_row; // plus 1
|
||||||
@ -50,6 +55,11 @@ public final class DimensionsRecord extends StandardRecord implements Cloneable
|
|||||||
field_3_first_col = in.readShort();
|
field_3_first_col = in.readShort();
|
||||||
field_4_last_col = in.readShort();
|
field_4_last_col = in.readShort();
|
||||||
field_5_zero = in.readShort();
|
field_5_zero = in.readShort();
|
||||||
|
//POI-61045 -- in practice, there can be an extra 2 bytes
|
||||||
|
if (in.available() == 2) {
|
||||||
|
logger.log(POILogger.INFO, "DimensionsRecord has extra 2 bytes.");
|
||||||
|
in.readShort();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -18,7 +18,10 @@
|
|||||||
package org.apache.poi.hssf.record;
|
package org.apache.poi.hssf.record;
|
||||||
|
|
||||||
import org.apache.poi.util.HexDump;
|
import org.apache.poi.util.HexDump;
|
||||||
|
import org.apache.poi.util.LittleEndianConsts;
|
||||||
import org.apache.poi.util.LittleEndianOutput;
|
import org.apache.poi.util.LittleEndianOutput;
|
||||||
|
import org.apache.poi.util.POILogFactory;
|
||||||
|
import org.apache.poi.util.POILogger;
|
||||||
import org.apache.poi.util.StringUtil;
|
import org.apache.poi.util.StringUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -28,6 +31,9 @@ import org.apache.poi.util.StringUtil;
|
|||||||
* REFERENCE: PG 317 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)
|
* REFERENCE: PG 317 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)
|
||||||
*/
|
*/
|
||||||
public final class FormatRecord extends StandardRecord implements Cloneable {
|
public final class FormatRecord extends StandardRecord implements Cloneable {
|
||||||
|
|
||||||
|
private static final POILogger logger = POILogFactory.getLogger(FormatRecord.class);
|
||||||
|
|
||||||
public final static short sid = 0x041E;
|
public final static short sid = 0x041E;
|
||||||
|
|
||||||
private final int field_1_index_code;
|
private final int field_1_index_code;
|
||||||
@ -52,9 +58,9 @@ public final class FormatRecord extends StandardRecord implements Cloneable {
|
|||||||
field_3_hasMultibyte = (in.readByte() & 0x01) != 0;
|
field_3_hasMultibyte = (in.readByte() & 0x01) != 0;
|
||||||
|
|
||||||
if (field_3_hasMultibyte) {
|
if (field_3_hasMultibyte) {
|
||||||
field_4_formatstring = in.readUnicodeLEString(field_3_unicode_len);
|
field_4_formatstring = readStringCommon(in, field_3_unicode_len, false);
|
||||||
} else {
|
} else {
|
||||||
field_4_formatstring = in.readCompressedUnicode(field_3_unicode_len);
|
field_4_formatstring = readStringCommon(in, field_3_unicode_len, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -113,4 +119,55 @@ public final class FormatRecord extends StandardRecord implements Cloneable {
|
|||||||
public FormatRecord clone() {
|
public FormatRecord clone() {
|
||||||
return new FormatRecord(this);
|
return new FormatRecord(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static String readStringCommon(RecordInputStream ris, int requestedLength, boolean pIsCompressedEncoding) {
|
||||||
|
//custom copy of ris.readUnicodeLEString to allow for extra bytes at the end
|
||||||
|
|
||||||
|
// Sanity check to detect garbage string lengths
|
||||||
|
if (requestedLength < 0 || requestedLength > 0x100000) { // 16 million chars?
|
||||||
|
throw new IllegalArgumentException("Bad requested string length (" + requestedLength + ")");
|
||||||
|
}
|
||||||
|
char[] buf = null;
|
||||||
|
boolean isCompressedEncoding = pIsCompressedEncoding;
|
||||||
|
int availableChars = isCompressedEncoding ? ris.remaining() : ris.remaining() / LittleEndianConsts.SHORT_SIZE;
|
||||||
|
//everything worked out. Great!
|
||||||
|
int remaining = ris.remaining();
|
||||||
|
if (requestedLength == availableChars) {
|
||||||
|
buf = new char[requestedLength];
|
||||||
|
} else {
|
||||||
|
//sometimes in older Excel 97 .xls files,
|
||||||
|
//the requested length is wrong.
|
||||||
|
//Read all available characters.
|
||||||
|
buf = new char[availableChars];
|
||||||
|
}
|
||||||
|
for (int i = 0; i < buf.length; i++) {
|
||||||
|
char ch;
|
||||||
|
if (isCompressedEncoding) {
|
||||||
|
ch = (char) ris.readUByte();
|
||||||
|
} else {
|
||||||
|
ch = (char) ris.readShort();
|
||||||
|
}
|
||||||
|
buf[i] = ch;
|
||||||
|
}
|
||||||
|
|
||||||
|
//TIKA-2154's file shows that even in a unicode string
|
||||||
|
//there can be a remaining byte (without proper final '00')
|
||||||
|
//that should be read as a byte
|
||||||
|
if (ris.available() == 1) {
|
||||||
|
char[] tmp = new char[buf.length+1];
|
||||||
|
System.arraycopy(buf, 0, tmp, 0, buf.length);
|
||||||
|
tmp[buf.length] = (char)ris.readUByte();
|
||||||
|
buf = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ris.available() > 0) {
|
||||||
|
logger.log(POILogger.INFO, "FormatRecord has "+ris.available()+" unexplained bytes. Silently skipping");
|
||||||
|
//swallow what's left
|
||||||
|
while (ris.available() > 0) {
|
||||||
|
ris.readByte();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new String(buf);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -17,11 +17,11 @@
|
|||||||
|
|
||||||
package org.apache.poi.hssf.extractor;
|
package org.apache.poi.hssf.extractor;
|
||||||
|
|
||||||
|
import static org.apache.poi.POITestCase.assertContains;
|
||||||
|
import static org.apache.poi.POITestCase.assertStartsWith;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.apache.poi.POITestCase.assertContains;
|
|
||||||
import static org.apache.poi.POITestCase.assertStartsWith;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@ -388,4 +388,13 @@ public final class TestExcelExtractor {
|
|||||||
assertNotNull(extractor.getText());
|
assertNotNull(extractor.getText());
|
||||||
extractor.close();
|
extractor.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test61045() throws IOException {
|
||||||
|
//bug 61045. File is govdocs1 626534
|
||||||
|
ExcelExtractor extractor = createExtractor("61045_govdocs1_626534.xls");
|
||||||
|
String txt = extractor.getText();
|
||||||
|
assertContains(txt, "NONBUSINESS");
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
BIN
test-data/spreadsheet/61045_govdocs1_626534.xls
Normal file
BIN
test-data/spreadsheet/61045_govdocs1_626534.xls
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user