From 738c5184744dd70e84e0a8ba6f7f58f20731552f Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Sun, 30 Nov 2014 01:03:24 +0000 Subject: [PATCH] Further Excel 4 text extractor support, for TIKA-1490 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1642492 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/hssf/extractor/OldExcelExtractor.java | 15 +-- .../apache/poi/hssf/record/FormulaRecord.java | 1 - .../poi/hssf/record/OldFormulaRecord.java | 118 ++++++++++++++++++ .../hssf/extractor/TestOldExcelExtractor.java | 2 +- 4 files changed, 127 insertions(+), 9 deletions(-) create mode 100644 src/java/org/apache/poi/hssf/record/OldFormulaRecord.java diff --git a/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java b/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java index e3705f159..af0fee01a 100644 --- a/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java +++ b/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java @@ -24,6 +24,7 @@ import java.io.InputStream; import org.apache.poi.hssf.record.FormulaRecord; import org.apache.poi.hssf.record.NumberRecord; +import org.apache.poi.hssf.record.OldFormulaRecord; import org.apache.poi.hssf.record.OldLabelRecord; import org.apache.poi.hssf.record.OldStringRecord; import org.apache.poi.hssf.record.RKRecord; @@ -100,15 +101,15 @@ public class OldExcelExtractor { text.append(nr.getValue()); text.append('\n'); break; -/* - case OldFormulaRecord.sid: - FormulaRecord fr = new FormulaRecord(ris); -System.out.println(fr.getCachedResultType()); - if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) { + case OldFormulaRecord.biff2_sid: + case OldFormulaRecord.biff3_sid: + case OldFormulaRecord.biff4_sid: + OldFormulaRecord fr = new OldFormulaRecord(ris); +// if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) { text.append(fr.getValue()); text.append('\n'); - } -*/ +// } + break; case RKRecord.sid: RKRecord rr = new RKRecord(ris); text.append(rr.getRKNumber()); diff --git a/src/java/org/apache/poi/hssf/record/FormulaRecord.java b/src/java/org/apache/poi/hssf/record/FormulaRecord.java index ccd3cb041..c0a63d309 100644 --- a/src/java/org/apache/poi/hssf/record/FormulaRecord.java +++ b/src/java/org/apache/poi/hssf/record/FormulaRecord.java @@ -36,7 +36,6 @@ import org.apache.poi.util.LittleEndianOutput; public final class FormulaRecord extends CellRecord { public static final short sid = 0x0006; // docs say 406...because of a bug Microsoft support site article #Q184647) - public static final short olderSid = 0x0406; // older biff versions do manage 406! private static int FIXED_SIZE = 14; // double + short + int private static final BitField alwaysCalc = BitFieldFactory.getInstance(0x0001); diff --git a/src/java/org/apache/poi/hssf/record/OldFormulaRecord.java b/src/java/org/apache/poi/hssf/record/OldFormulaRecord.java new file mode 100644 index 000000000..3be21f535 --- /dev/null +++ b/src/java/org/apache/poi/hssf/record/OldFormulaRecord.java @@ -0,0 +1,118 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hssf.record; + +import org.apache.poi.ss.formula.Formula; +import org.apache.poi.ss.formula.ptg.Ptg; + +/** + * Formula Record (0x0006 / 0x0206 / 0x0406) - holds a formula in + * encoded form, along with the value if a number + */ +public final class OldFormulaRecord { + public final static short biff2_sid = 0x0006; + public final static short biff3_sid = 0x0206; + public final static short biff4_sid = 0x0406; + public final static short biff5_sid = 0x0006; + + private short sid; + private int field_1_row; + private short field_2_column; + private int field_3_cell_attrs; // Biff 2 + private short field_3_xf_index; // Biff 3+ + private double field_4_value; + private short field_5_options; + private Formula field_6_parsed_expr; + + public OldFormulaRecord(RecordInputStream ris) { + field_1_row = ris.readUShort(); + field_2_column = ris.readShort(); + + if (ris.getSid() == biff2_sid) { + field_3_cell_attrs = ris.readUShort() << 8; + field_3_cell_attrs += ris.readUByte(); + } else { + field_3_xf_index = ris.readShort(); + } + + // TODO Handle special cached values, for Biff 3+ + field_4_value = ris.readDouble(); + + if (ris.getSid() == biff2_sid) { + field_5_options = (short)ris.readUByte(); + } else { + field_5_options = ris.readShort(); + } + + int expression_len = ris.readShort(); + int nBytesAvailable = ris.available(); + field_6_parsed_expr = Formula.read(expression_len, ris, nBytesAvailable); + } + + public int getRow() + { + return field_1_row; + } + + public short getColumn() + { + return field_2_column; + } + + public short getXFIndex() + { + return field_3_xf_index; + } + public int getCellAttrs() + { + return field_3_cell_attrs; + } + + /** + * get the calculated value of the formula + * + * @return calculated value + */ + public double getValue() { + return field_4_value; + } + + /** + * get the option flags + * + * @return bitmask + */ + public short getOptions() { + return field_5_options; + } + + /** + * @return the formula tokens. never null + */ + public Ptg[] getParsedExpression() { + return field_6_parsed_expr.getTokens(); + } + + public Formula getFormula() { + return field_6_parsed_expr; + } + + public short getSid() { + return sid; + } +} diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestOldExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestOldExcelExtractor.java index fd057cd63..b83d8942d 100644 --- a/src/testcases/org/apache/poi/hssf/extractor/TestOldExcelExtractor.java +++ b/src/testcases/org/apache/poi/hssf/extractor/TestOldExcelExtractor.java @@ -65,7 +65,7 @@ public final class TestOldExcelExtractor extends TestCase { assertTrue(text, text.contains("$100,000 or more")); assertTrue(text, text.contains("S corporation returns, Form 1120S [10,15]")); // TODO Get these quotes working correctly -// assertTrue(text, text.contains("individual income tax return “short forms.”")); +// assertTrue(text, text.contains("individual income tax return \u201Cshort forms.\u201D")); // Formula based strings // TODO Find some then test