From 17b050c66fbc65964979db985b9b9dd9a3cff8d4 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Sun, 30 Nov 2014 01:20:16 +0000 Subject: [PATCH] Largely there with the Excel 4 extractor, for TIKA-1490 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1642493 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/poi/hssf/record/OldCellRecord.java | 111 ++++++++++++++++++ .../poi/hssf/record/OldFormulaRecord.java | 45 ++----- .../poi/hssf/record/OldLabelRecord.java | 86 ++------------ .../hssf/extractor/TestOldExcelExtractor.java | 7 +- 4 files changed, 132 insertions(+), 117 deletions(-) create mode 100644 src/java/org/apache/poi/hssf/record/OldCellRecord.java diff --git a/src/java/org/apache/poi/hssf/record/OldCellRecord.java b/src/java/org/apache/poi/hssf/record/OldCellRecord.java new file mode 100644 index 000000000..06a6b4d53 --- /dev/null +++ b/src/java/org/apache/poi/hssf/record/OldCellRecord.java @@ -0,0 +1,111 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hssf.record; + +import org.apache.poi.util.HexDump; + +/** + * Base class for all old (Biff 2 - Biff 4) cell value records + * (implementors of {@link CellValueRecordInterface}). + * Subclasses are expected to manage the cell data values (of various types). + */ +public abstract class OldCellRecord { + private short sid; + private boolean isBiff2; + private int field_1_row; + private short field_2_column; + private int field_3_cell_attrs; // Biff 2 + private short field_3_xf_index; // Biff 3+ + + protected OldCellRecord(RecordInputStream in, boolean isBiff2) { + this.sid = in.getSid(); + this.isBiff2 = isBiff2; + field_1_row = in.readUShort(); + field_2_column = in.readShort(); + + if (isBiff2) { + field_3_cell_attrs = in.readUShort() << 8; + field_3_cell_attrs += in.readUByte(); + } else { + field_3_xf_index = in.readShort(); + } + } + + public final int getRow() { + return field_1_row; + } + + public final short getColumn() { + return field_2_column; + } + + /** + * get the index to the ExtendedFormat, for non-Biff2 + * + * @see org.apache.poi.hssf.record.ExtendedFormatRecord + * @return index to the XF record + */ + public final short getXFIndex() { + return field_3_xf_index; + } + public int getCellAttrs() + { + return field_3_cell_attrs; + } + + /** + * Is this a Biff2 record, or newer? + */ + public boolean isBiff2() { + return isBiff2; + } + public short getSid() { + return sid; + } + + @Override + public final String toString() { + StringBuilder sb = new StringBuilder(); + String recordName = getRecordName(); + + sb.append("[").append(recordName).append("]\n"); + sb.append(" .row = ").append(HexDump.shortToHex(getRow())).append("\n"); + sb.append(" .col = ").append(HexDump.shortToHex(getColumn())).append("\n"); + if (isBiff2()) { + sb.append(" .cellattrs = ").append(HexDump.shortToHex(getCellAttrs())).append("\n"); + } else { + sb.append(" .xfindex = ").append(HexDump.shortToHex(getXFIndex())).append("\n"); + } + appendValueText(sb); + sb.append("\n"); + sb.append("[/").append(recordName).append("]\n"); + return sb.toString(); + } + + /** + * Append specific debug info (used by {@link #toString()} for the value + * contained in this record. Trailing new-line should not be appended + * (superclass does that). + */ + protected abstract void appendValueText(StringBuilder sb); + + /** + * Gets the debug info BIFF record type name (used by {@link #toString()}. + */ + protected abstract String getRecordName(); +} diff --git a/src/java/org/apache/poi/hssf/record/OldFormulaRecord.java b/src/java/org/apache/poi/hssf/record/OldFormulaRecord.java index 3be21f535..eb5335e93 100644 --- a/src/java/org/apache/poi/hssf/record/OldFormulaRecord.java +++ b/src/java/org/apache/poi/hssf/record/OldFormulaRecord.java @@ -24,36 +24,23 @@ import org.apache.poi.ss.formula.ptg.Ptg; * Formula Record (0x0006 / 0x0206 / 0x0406) - holds a formula in * encoded form, along with the value if a number */ -public final class OldFormulaRecord { +public final class OldFormulaRecord extends OldCellRecord { public final static short biff2_sid = 0x0006; public final static short biff3_sid = 0x0206; public final static short biff4_sid = 0x0406; public final static short biff5_sid = 0x0006; - private short sid; - private int field_1_row; - private short field_2_column; - private int field_3_cell_attrs; // Biff 2 - private short field_3_xf_index; // Biff 3+ private double field_4_value; private short field_5_options; private Formula field_6_parsed_expr; public OldFormulaRecord(RecordInputStream ris) { - field_1_row = ris.readUShort(); - field_2_column = ris.readShort(); - - if (ris.getSid() == biff2_sid) { - field_3_cell_attrs = ris.readUShort() << 8; - field_3_cell_attrs += ris.readUByte(); - } else { - field_3_xf_index = ris.readShort(); - } + super(ris, ris.getSid() == biff2_sid); // TODO Handle special cached values, for Biff 3+ field_4_value = ris.readDouble(); - if (ris.getSid() == biff2_sid) { + if (isBiff2()) { field_5_options = (short)ris.readUByte(); } else { field_5_options = ris.readShort(); @@ -64,25 +51,6 @@ public final class OldFormulaRecord { field_6_parsed_expr = Formula.read(expression_len, ris, nBytesAvailable); } - public int getRow() - { - return field_1_row; - } - - public short getColumn() - { - return field_2_column; - } - - public short getXFIndex() - { - return field_3_xf_index; - } - public int getCellAttrs() - { - return field_3_cell_attrs; - } - /** * get the calculated value of the formula * @@ -112,7 +80,10 @@ public final class OldFormulaRecord { return field_6_parsed_expr; } - public short getSid() { - return sid; + protected void appendValueText(StringBuilder sb) { + sb.append(" .value = ").append(getValue()).append("\n"); + } + protected String getRecordName() { + return "Old Formula"; } } diff --git a/src/java/org/apache/poi/hssf/record/OldLabelRecord.java b/src/java/org/apache/poi/hssf/record/OldLabelRecord.java index 97d6bef96..7c9aaace1 100644 --- a/src/java/org/apache/poi/hssf/record/OldLabelRecord.java +++ b/src/java/org/apache/poi/hssf/record/OldLabelRecord.java @@ -26,37 +26,26 @@ import org.apache.poi.util.POILogger; * strings stored directly in the cell, from the older file formats that * didn't use {@link LabelSSTRecord} */ -public final class OldLabelRecord extends Record implements CellValueRecordInterface { +public final class OldLabelRecord extends OldCellRecord { private final static POILogger logger = POILogFactory.getLogger(OldLabelRecord.class); public final static short biff2_sid = 0x0004; public final static short biff345_sid = 0x0204; - private short sid; - private int field_1_row; - private short field_2_column; - private int field_3_cell_attrs; // Biff 2 - private short field_3_xf_index; // Biff 3+ - private short field_4_string_len; - private byte[] field_5_bytes; - //private XXXXX codepage; // TODO Implement for this and OldStringRecord + private short field_4_string_len; + private byte[] field_5_bytes; + //private XXXXX codepage; // TODO Implement for this and OldStringRecord /** * @param in the RecordInputstream to read the record from */ public OldLabelRecord(RecordInputStream in) { - sid = in.getSid(); + super(in, in.getSid() == biff2_sid); - field_1_row = in.readUShort(); - field_2_column = in.readShort(); - - if (in.getSid() == biff2_sid) { - field_3_cell_attrs = in.readUShort() << 8; - field_3_cell_attrs += in.readUByte(); + if (isBiff2()) { field_4_string_len = (short)in.readUByte(); } else { - field_3_xf_index = in.readShort(); field_4_string_len = in.readShort(); } @@ -72,29 +61,6 @@ public final class OldLabelRecord extends Record implements CellValueRecordInter } } - public boolean isBiff2() { - return sid == biff2_sid; - } - - public int getRow() - { - return field_1_row; - } - - public short getColumn() - { - return field_2_column; - } - - public short getXFIndex() - { - return field_3_xf_index; - } - public int getCellAttrs() - { - return field_3_cell_attrs; - } - /** * get the number of characters this string contains * @return number of characters @@ -123,46 +89,12 @@ public final class OldLabelRecord extends Record implements CellValueRecordInter throw new RecordFormatException("Old Label Records are supported READ ONLY"); } - public short getSid() - { - return sid; - } - - public String toString() - { - StringBuffer sb = new StringBuffer(); - sb.append("[OLD LABEL]\n"); - sb.append(" .row = ").append(HexDump.shortToHex(getRow())).append("\n"); - sb.append(" .column = ").append(HexDump.shortToHex(getColumn())).append("\n"); - if (isBiff2()) { - sb.append(" .cellattrs = ").append(HexDump.shortToHex(getCellAttrs())).append("\n"); - } else { - sb.append(" .xfindex = ").append(HexDump.shortToHex(getXFIndex())).append("\n"); - } + protected void appendValueText(StringBuilder sb) { sb.append(" .string_len= ").append(HexDump.shortToHex(field_4_string_len)).append("\n"); sb.append(" .value = ").append(getValue()).append("\n"); - sb.append("[/OLD LABEL]\n"); - return sb.toString(); } - /** - * NO-OP! - */ - public void setColumn(short col) - { - } - - /** - * NO-OP! - */ - public void setRow(int row) - { - } - - /** - * no op! - */ - public void setXFIndex(short xf) - { + protected String getRecordName() { + return "OLD LABEL"; } } diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestOldExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestOldExcelExtractor.java index b83d8942d..f6c36e16c 100644 --- a/src/testcases/org/apache/poi/hssf/extractor/TestOldExcelExtractor.java +++ b/src/testcases/org/apache/poi/hssf/extractor/TestOldExcelExtractor.java @@ -80,11 +80,12 @@ public final class TestOldExcelExtractor extends TestCase { assertTrue(text, text.contains("784")); // Numbers which come from formulas - // TODO -// assertTrue(text, text.contains("0.40")); -// assertTrue(text, text.contains("624")); + assertTrue(text, text.contains("0.398")); // TODO Rounding + assertTrue(text, text.contains("624")); // Formatted numbers // TODO +// assertTrue(text, text.contains("55,624")); +// assertTrue(text, text.contains("11,743,477")); } }