Largely there with the Excel 4 extractor, for TIKA-1490

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1642493 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2014-11-30 01:20:16 +00:00
parent 738c518474
commit 17b050c66f
4 changed files with 132 additions and 117 deletions

View File

@ -0,0 +1,111 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record;
import org.apache.poi.util.HexDump;
/**
* Base class for all old (Biff 2 - Biff 4) cell value records
* (implementors of {@link CellValueRecordInterface}).
* Subclasses are expected to manage the cell data values (of various types).
*/
public abstract class OldCellRecord {
private short sid;
private boolean isBiff2;
private int field_1_row;
private short field_2_column;
private int field_3_cell_attrs; // Biff 2
private short field_3_xf_index; // Biff 3+
protected OldCellRecord(RecordInputStream in, boolean isBiff2) {
this.sid = in.getSid();
this.isBiff2 = isBiff2;
field_1_row = in.readUShort();
field_2_column = in.readShort();
if (isBiff2) {
field_3_cell_attrs = in.readUShort() << 8;
field_3_cell_attrs += in.readUByte();
} else {
field_3_xf_index = in.readShort();
}
}
public final int getRow() {
return field_1_row;
}
public final short getColumn() {
return field_2_column;
}
/**
* get the index to the ExtendedFormat, for non-Biff2
*
* @see org.apache.poi.hssf.record.ExtendedFormatRecord
* @return index to the XF record
*/
public final short getXFIndex() {
return field_3_xf_index;
}
public int getCellAttrs()
{
return field_3_cell_attrs;
}
/**
* Is this a Biff2 record, or newer?
*/
public boolean isBiff2() {
return isBiff2;
}
public short getSid() {
return sid;
}
@Override
public final String toString() {
StringBuilder sb = new StringBuilder();
String recordName = getRecordName();
sb.append("[").append(recordName).append("]\n");
sb.append(" .row = ").append(HexDump.shortToHex(getRow())).append("\n");
sb.append(" .col = ").append(HexDump.shortToHex(getColumn())).append("\n");
if (isBiff2()) {
sb.append(" .cellattrs = ").append(HexDump.shortToHex(getCellAttrs())).append("\n");
} else {
sb.append(" .xfindex = ").append(HexDump.shortToHex(getXFIndex())).append("\n");
}
appendValueText(sb);
sb.append("\n");
sb.append("[/").append(recordName).append("]\n");
return sb.toString();
}
/**
* Append specific debug info (used by {@link #toString()} for the value
* contained in this record. Trailing new-line should not be appended
* (superclass does that).
*/
protected abstract void appendValueText(StringBuilder sb);
/**
* Gets the debug info BIFF record type name (used by {@link #toString()}.
*/
protected abstract String getRecordName();
}

View File

@ -24,36 +24,23 @@ import org.apache.poi.ss.formula.ptg.Ptg;
* Formula Record (0x0006 / 0x0206 / 0x0406) - holds a formula in * Formula Record (0x0006 / 0x0206 / 0x0406) - holds a formula in
* encoded form, along with the value if a number * encoded form, along with the value if a number
*/ */
public final class OldFormulaRecord { public final class OldFormulaRecord extends OldCellRecord {
public final static short biff2_sid = 0x0006; public final static short biff2_sid = 0x0006;
public final static short biff3_sid = 0x0206; public final static short biff3_sid = 0x0206;
public final static short biff4_sid = 0x0406; public final static short biff4_sid = 0x0406;
public final static short biff5_sid = 0x0006; public final static short biff5_sid = 0x0006;
private short sid;
private int field_1_row;
private short field_2_column;
private int field_3_cell_attrs; // Biff 2
private short field_3_xf_index; // Biff 3+
private double field_4_value; private double field_4_value;
private short field_5_options; private short field_5_options;
private Formula field_6_parsed_expr; private Formula field_6_parsed_expr;
public OldFormulaRecord(RecordInputStream ris) { public OldFormulaRecord(RecordInputStream ris) {
field_1_row = ris.readUShort(); super(ris, ris.getSid() == biff2_sid);
field_2_column = ris.readShort();
if (ris.getSid() == biff2_sid) {
field_3_cell_attrs = ris.readUShort() << 8;
field_3_cell_attrs += ris.readUByte();
} else {
field_3_xf_index = ris.readShort();
}
// TODO Handle special cached values, for Biff 3+ // TODO Handle special cached values, for Biff 3+
field_4_value = ris.readDouble(); field_4_value = ris.readDouble();
if (ris.getSid() == biff2_sid) { if (isBiff2()) {
field_5_options = (short)ris.readUByte(); field_5_options = (short)ris.readUByte();
} else { } else {
field_5_options = ris.readShort(); field_5_options = ris.readShort();
@ -64,25 +51,6 @@ public final class OldFormulaRecord {
field_6_parsed_expr = Formula.read(expression_len, ris, nBytesAvailable); field_6_parsed_expr = Formula.read(expression_len, ris, nBytesAvailable);
} }
public int getRow()
{
return field_1_row;
}
public short getColumn()
{
return field_2_column;
}
public short getXFIndex()
{
return field_3_xf_index;
}
public int getCellAttrs()
{
return field_3_cell_attrs;
}
/** /**
* get the calculated value of the formula * get the calculated value of the formula
* *
@ -112,7 +80,10 @@ public final class OldFormulaRecord {
return field_6_parsed_expr; return field_6_parsed_expr;
} }
public short getSid() { protected void appendValueText(StringBuilder sb) {
return sid; sb.append(" .value = ").append(getValue()).append("\n");
}
protected String getRecordName() {
return "Old Formula";
} }
} }

View File

@ -26,37 +26,26 @@ import org.apache.poi.util.POILogger;
* strings stored directly in the cell, from the older file formats that * strings stored directly in the cell, from the older file formats that
* didn't use {@link LabelSSTRecord} * didn't use {@link LabelSSTRecord}
*/ */
public final class OldLabelRecord extends Record implements CellValueRecordInterface { public final class OldLabelRecord extends OldCellRecord {
private final static POILogger logger = POILogFactory.getLogger(OldLabelRecord.class); private final static POILogger logger = POILogFactory.getLogger(OldLabelRecord.class);
public final static short biff2_sid = 0x0004; public final static short biff2_sid = 0x0004;
public final static short biff345_sid = 0x0204; public final static short biff345_sid = 0x0204;
private short sid; private short field_4_string_len;
private int field_1_row; private byte[] field_5_bytes;
private short field_2_column; //private XXXXX codepage; // TODO Implement for this and OldStringRecord
private int field_3_cell_attrs; // Biff 2
private short field_3_xf_index; // Biff 3+
private short field_4_string_len;
private byte[] field_5_bytes;
//private XXXXX codepage; // TODO Implement for this and OldStringRecord
/** /**
* @param in the RecordInputstream to read the record from * @param in the RecordInputstream to read the record from
*/ */
public OldLabelRecord(RecordInputStream in) public OldLabelRecord(RecordInputStream in)
{ {
sid = in.getSid(); super(in, in.getSid() == biff2_sid);
field_1_row = in.readUShort(); if (isBiff2()) {
field_2_column = in.readShort();
if (in.getSid() == biff2_sid) {
field_3_cell_attrs = in.readUShort() << 8;
field_3_cell_attrs += in.readUByte();
field_4_string_len = (short)in.readUByte(); field_4_string_len = (short)in.readUByte();
} else { } else {
field_3_xf_index = in.readShort();
field_4_string_len = in.readShort(); field_4_string_len = in.readShort();
} }
@ -72,29 +61,6 @@ public final class OldLabelRecord extends Record implements CellValueRecordInter
} }
} }
public boolean isBiff2() {
return sid == biff2_sid;
}
public int getRow()
{
return field_1_row;
}
public short getColumn()
{
return field_2_column;
}
public short getXFIndex()
{
return field_3_xf_index;
}
public int getCellAttrs()
{
return field_3_cell_attrs;
}
/** /**
* get the number of characters this string contains * get the number of characters this string contains
* @return number of characters * @return number of characters
@ -123,46 +89,12 @@ public final class OldLabelRecord extends Record implements CellValueRecordInter
throw new RecordFormatException("Old Label Records are supported READ ONLY"); throw new RecordFormatException("Old Label Records are supported READ ONLY");
} }
public short getSid() protected void appendValueText(StringBuilder sb) {
{
return sid;
}
public String toString()
{
StringBuffer sb = new StringBuffer();
sb.append("[OLD LABEL]\n");
sb.append(" .row = ").append(HexDump.shortToHex(getRow())).append("\n");
sb.append(" .column = ").append(HexDump.shortToHex(getColumn())).append("\n");
if (isBiff2()) {
sb.append(" .cellattrs = ").append(HexDump.shortToHex(getCellAttrs())).append("\n");
} else {
sb.append(" .xfindex = ").append(HexDump.shortToHex(getXFIndex())).append("\n");
}
sb.append(" .string_len= ").append(HexDump.shortToHex(field_4_string_len)).append("\n"); sb.append(" .string_len= ").append(HexDump.shortToHex(field_4_string_len)).append("\n");
sb.append(" .value = ").append(getValue()).append("\n"); sb.append(" .value = ").append(getValue()).append("\n");
sb.append("[/OLD LABEL]\n");
return sb.toString();
} }
/** protected String getRecordName() {
* NO-OP! return "OLD LABEL";
*/
public void setColumn(short col)
{
}
/**
* NO-OP!
*/
public void setRow(int row)
{
}
/**
* no op!
*/
public void setXFIndex(short xf)
{
} }
} }

View File

@ -80,11 +80,12 @@ public final class TestOldExcelExtractor extends TestCase {
assertTrue(text, text.contains("784")); assertTrue(text, text.contains("784"));
// Numbers which come from formulas // Numbers which come from formulas
// TODO assertTrue(text, text.contains("0.398")); // TODO Rounding
// assertTrue(text, text.contains("0.40")); assertTrue(text, text.contains("624"));
// assertTrue(text, text.contains("624"));
// Formatted numbers // Formatted numbers
// TODO // TODO
// assertTrue(text, text.contains("55,624"));
// assertTrue(text, text.contains("11,743,477"));
} }
} }