Largely there with the Excel 4 extractor, for TIKA-1490
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1642493 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
738c518474
commit
17b050c66f
111
src/java/org/apache/poi/hssf/record/OldCellRecord.java
Normal file
111
src/java/org/apache/poi/hssf/record/OldCellRecord.java
Normal file
@ -0,0 +1,111 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.hssf.record;
|
||||
|
||||
import org.apache.poi.util.HexDump;
|
||||
|
||||
/**
|
||||
* Base class for all old (Biff 2 - Biff 4) cell value records
|
||||
* (implementors of {@link CellValueRecordInterface}).
|
||||
* Subclasses are expected to manage the cell data values (of various types).
|
||||
*/
|
||||
public abstract class OldCellRecord {
|
||||
private short sid;
|
||||
private boolean isBiff2;
|
||||
private int field_1_row;
|
||||
private short field_2_column;
|
||||
private int field_3_cell_attrs; // Biff 2
|
||||
private short field_3_xf_index; // Biff 3+
|
||||
|
||||
protected OldCellRecord(RecordInputStream in, boolean isBiff2) {
|
||||
this.sid = in.getSid();
|
||||
this.isBiff2 = isBiff2;
|
||||
field_1_row = in.readUShort();
|
||||
field_2_column = in.readShort();
|
||||
|
||||
if (isBiff2) {
|
||||
field_3_cell_attrs = in.readUShort() << 8;
|
||||
field_3_cell_attrs += in.readUByte();
|
||||
} else {
|
||||
field_3_xf_index = in.readShort();
|
||||
}
|
||||
}
|
||||
|
||||
public final int getRow() {
|
||||
return field_1_row;
|
||||
}
|
||||
|
||||
public final short getColumn() {
|
||||
return field_2_column;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the index to the ExtendedFormat, for non-Biff2
|
||||
*
|
||||
* @see org.apache.poi.hssf.record.ExtendedFormatRecord
|
||||
* @return index to the XF record
|
||||
*/
|
||||
public final short getXFIndex() {
|
||||
return field_3_xf_index;
|
||||
}
|
||||
public int getCellAttrs()
|
||||
{
|
||||
return field_3_cell_attrs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this a Biff2 record, or newer?
|
||||
*/
|
||||
public boolean isBiff2() {
|
||||
return isBiff2;
|
||||
}
|
||||
public short getSid() {
|
||||
return sid;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String recordName = getRecordName();
|
||||
|
||||
sb.append("[").append(recordName).append("]\n");
|
||||
sb.append(" .row = ").append(HexDump.shortToHex(getRow())).append("\n");
|
||||
sb.append(" .col = ").append(HexDump.shortToHex(getColumn())).append("\n");
|
||||
if (isBiff2()) {
|
||||
sb.append(" .cellattrs = ").append(HexDump.shortToHex(getCellAttrs())).append("\n");
|
||||
} else {
|
||||
sb.append(" .xfindex = ").append(HexDump.shortToHex(getXFIndex())).append("\n");
|
||||
}
|
||||
appendValueText(sb);
|
||||
sb.append("\n");
|
||||
sb.append("[/").append(recordName).append("]\n");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Append specific debug info (used by {@link #toString()} for the value
|
||||
* contained in this record. Trailing new-line should not be appended
|
||||
* (superclass does that).
|
||||
*/
|
||||
protected abstract void appendValueText(StringBuilder sb);
|
||||
|
||||
/**
|
||||
* Gets the debug info BIFF record type name (used by {@link #toString()}.
|
||||
*/
|
||||
protected abstract String getRecordName();
|
||||
}
|
@ -24,36 +24,23 @@ import org.apache.poi.ss.formula.ptg.Ptg;
|
||||
* Formula Record (0x0006 / 0x0206 / 0x0406) - holds a formula in
|
||||
* encoded form, along with the value if a number
|
||||
*/
|
||||
public final class OldFormulaRecord {
|
||||
public final class OldFormulaRecord extends OldCellRecord {
|
||||
public final static short biff2_sid = 0x0006;
|
||||
public final static short biff3_sid = 0x0206;
|
||||
public final static short biff4_sid = 0x0406;
|
||||
public final static short biff5_sid = 0x0006;
|
||||
|
||||
private short sid;
|
||||
private int field_1_row;
|
||||
private short field_2_column;
|
||||
private int field_3_cell_attrs; // Biff 2
|
||||
private short field_3_xf_index; // Biff 3+
|
||||
private double field_4_value;
|
||||
private short field_5_options;
|
||||
private Formula field_6_parsed_expr;
|
||||
|
||||
public OldFormulaRecord(RecordInputStream ris) {
|
||||
field_1_row = ris.readUShort();
|
||||
field_2_column = ris.readShort();
|
||||
|
||||
if (ris.getSid() == biff2_sid) {
|
||||
field_3_cell_attrs = ris.readUShort() << 8;
|
||||
field_3_cell_attrs += ris.readUByte();
|
||||
} else {
|
||||
field_3_xf_index = ris.readShort();
|
||||
}
|
||||
super(ris, ris.getSid() == biff2_sid);
|
||||
|
||||
// TODO Handle special cached values, for Biff 3+
|
||||
field_4_value = ris.readDouble();
|
||||
|
||||
if (ris.getSid() == biff2_sid) {
|
||||
if (isBiff2()) {
|
||||
field_5_options = (short)ris.readUByte();
|
||||
} else {
|
||||
field_5_options = ris.readShort();
|
||||
@ -64,25 +51,6 @@ public final class OldFormulaRecord {
|
||||
field_6_parsed_expr = Formula.read(expression_len, ris, nBytesAvailable);
|
||||
}
|
||||
|
||||
public int getRow()
|
||||
{
|
||||
return field_1_row;
|
||||
}
|
||||
|
||||
public short getColumn()
|
||||
{
|
||||
return field_2_column;
|
||||
}
|
||||
|
||||
public short getXFIndex()
|
||||
{
|
||||
return field_3_xf_index;
|
||||
}
|
||||
public int getCellAttrs()
|
||||
{
|
||||
return field_3_cell_attrs;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the calculated value of the formula
|
||||
*
|
||||
@ -112,7 +80,10 @@ public final class OldFormulaRecord {
|
||||
return field_6_parsed_expr;
|
||||
}
|
||||
|
||||
public short getSid() {
|
||||
return sid;
|
||||
protected void appendValueText(StringBuilder sb) {
|
||||
sb.append(" .value = ").append(getValue()).append("\n");
|
||||
}
|
||||
protected String getRecordName() {
|
||||
return "Old Formula";
|
||||
}
|
||||
}
|
||||
|
@ -26,37 +26,26 @@ import org.apache.poi.util.POILogger;
|
||||
* strings stored directly in the cell, from the older file formats that
|
||||
* didn't use {@link LabelSSTRecord}
|
||||
*/
|
||||
public final class OldLabelRecord extends Record implements CellValueRecordInterface {
|
||||
public final class OldLabelRecord extends OldCellRecord {
|
||||
private final static POILogger logger = POILogFactory.getLogger(OldLabelRecord.class);
|
||||
|
||||
public final static short biff2_sid = 0x0004;
|
||||
public final static short biff345_sid = 0x0204;
|
||||
|
||||
private short sid;
|
||||
private int field_1_row;
|
||||
private short field_2_column;
|
||||
private int field_3_cell_attrs; // Biff 2
|
||||
private short field_3_xf_index; // Biff 3+
|
||||
private short field_4_string_len;
|
||||
private byte[] field_5_bytes;
|
||||
//private XXXXX codepage; // TODO Implement for this and OldStringRecord
|
||||
private short field_4_string_len;
|
||||
private byte[] field_5_bytes;
|
||||
//private XXXXX codepage; // TODO Implement for this and OldStringRecord
|
||||
|
||||
/**
|
||||
* @param in the RecordInputstream to read the record from
|
||||
*/
|
||||
public OldLabelRecord(RecordInputStream in)
|
||||
{
|
||||
sid = in.getSid();
|
||||
super(in, in.getSid() == biff2_sid);
|
||||
|
||||
field_1_row = in.readUShort();
|
||||
field_2_column = in.readShort();
|
||||
|
||||
if (in.getSid() == biff2_sid) {
|
||||
field_3_cell_attrs = in.readUShort() << 8;
|
||||
field_3_cell_attrs += in.readUByte();
|
||||
if (isBiff2()) {
|
||||
field_4_string_len = (short)in.readUByte();
|
||||
} else {
|
||||
field_3_xf_index = in.readShort();
|
||||
field_4_string_len = in.readShort();
|
||||
}
|
||||
|
||||
@ -72,29 +61,6 @@ public final class OldLabelRecord extends Record implements CellValueRecordInter
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isBiff2() {
|
||||
return sid == biff2_sid;
|
||||
}
|
||||
|
||||
public int getRow()
|
||||
{
|
||||
return field_1_row;
|
||||
}
|
||||
|
||||
public short getColumn()
|
||||
{
|
||||
return field_2_column;
|
||||
}
|
||||
|
||||
public short getXFIndex()
|
||||
{
|
||||
return field_3_xf_index;
|
||||
}
|
||||
public int getCellAttrs()
|
||||
{
|
||||
return field_3_cell_attrs;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the number of characters this string contains
|
||||
* @return number of characters
|
||||
@ -123,46 +89,12 @@ public final class OldLabelRecord extends Record implements CellValueRecordInter
|
||||
throw new RecordFormatException("Old Label Records are supported READ ONLY");
|
||||
}
|
||||
|
||||
public short getSid()
|
||||
{
|
||||
return sid;
|
||||
}
|
||||
|
||||
public String toString()
|
||||
{
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append("[OLD LABEL]\n");
|
||||
sb.append(" .row = ").append(HexDump.shortToHex(getRow())).append("\n");
|
||||
sb.append(" .column = ").append(HexDump.shortToHex(getColumn())).append("\n");
|
||||
if (isBiff2()) {
|
||||
sb.append(" .cellattrs = ").append(HexDump.shortToHex(getCellAttrs())).append("\n");
|
||||
} else {
|
||||
sb.append(" .xfindex = ").append(HexDump.shortToHex(getXFIndex())).append("\n");
|
||||
}
|
||||
protected void appendValueText(StringBuilder sb) {
|
||||
sb.append(" .string_len= ").append(HexDump.shortToHex(field_4_string_len)).append("\n");
|
||||
sb.append(" .value = ").append(getValue()).append("\n");
|
||||
sb.append("[/OLD LABEL]\n");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* NO-OP!
|
||||
*/
|
||||
public void setColumn(short col)
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* NO-OP!
|
||||
*/
|
||||
public void setRow(int row)
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* no op!
|
||||
*/
|
||||
public void setXFIndex(short xf)
|
||||
{
|
||||
protected String getRecordName() {
|
||||
return "OLD LABEL";
|
||||
}
|
||||
}
|
||||
|
@ -80,11 +80,12 @@ public final class TestOldExcelExtractor extends TestCase {
|
||||
assertTrue(text, text.contains("784"));
|
||||
|
||||
// Numbers which come from formulas
|
||||
// TODO
|
||||
// assertTrue(text, text.contains("0.40"));
|
||||
// assertTrue(text, text.contains("624"));
|
||||
assertTrue(text, text.contains("0.398")); // TODO Rounding
|
||||
assertTrue(text, text.contains("624"));
|
||||
|
||||
// Formatted numbers
|
||||
// TODO
|
||||
// assertTrue(text, text.contains("55,624"));
|
||||
// assertTrue(text, text.contains("11,743,477"));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user