Further Excel 4 text extractor support, for TIKA-1490
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1642492 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
553964a455
commit
738c518474
@ -24,6 +24,7 @@ import java.io.InputStream;
|
|||||||
|
|
||||||
import org.apache.poi.hssf.record.FormulaRecord;
|
import org.apache.poi.hssf.record.FormulaRecord;
|
||||||
import org.apache.poi.hssf.record.NumberRecord;
|
import org.apache.poi.hssf.record.NumberRecord;
|
||||||
|
import org.apache.poi.hssf.record.OldFormulaRecord;
|
||||||
import org.apache.poi.hssf.record.OldLabelRecord;
|
import org.apache.poi.hssf.record.OldLabelRecord;
|
||||||
import org.apache.poi.hssf.record.OldStringRecord;
|
import org.apache.poi.hssf.record.OldStringRecord;
|
||||||
import org.apache.poi.hssf.record.RKRecord;
|
import org.apache.poi.hssf.record.RKRecord;
|
||||||
@ -100,15 +101,15 @@ public class OldExcelExtractor {
|
|||||||
text.append(nr.getValue());
|
text.append(nr.getValue());
|
||||||
text.append('\n');
|
text.append('\n');
|
||||||
break;
|
break;
|
||||||
/*
|
case OldFormulaRecord.biff2_sid:
|
||||||
case OldFormulaRecord.sid:
|
case OldFormulaRecord.biff3_sid:
|
||||||
FormulaRecord fr = new FormulaRecord(ris);
|
case OldFormulaRecord.biff4_sid:
|
||||||
System.out.println(fr.getCachedResultType());
|
OldFormulaRecord fr = new OldFormulaRecord(ris);
|
||||||
if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) {
|
// if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) {
|
||||||
text.append(fr.getValue());
|
text.append(fr.getValue());
|
||||||
text.append('\n');
|
text.append('\n');
|
||||||
}
|
// }
|
||||||
*/
|
break;
|
||||||
case RKRecord.sid:
|
case RKRecord.sid:
|
||||||
RKRecord rr = new RKRecord(ris);
|
RKRecord rr = new RKRecord(ris);
|
||||||
text.append(rr.getRKNumber());
|
text.append(rr.getRKNumber());
|
||||||
|
@ -36,7 +36,6 @@ import org.apache.poi.util.LittleEndianOutput;
|
|||||||
public final class FormulaRecord extends CellRecord {
|
public final class FormulaRecord extends CellRecord {
|
||||||
|
|
||||||
public static final short sid = 0x0006; // docs say 406...because of a bug Microsoft support site article #Q184647)
|
public static final short sid = 0x0006; // docs say 406...because of a bug Microsoft support site article #Q184647)
|
||||||
public static final short olderSid = 0x0406; // older biff versions do manage 406!
|
|
||||||
private static int FIXED_SIZE = 14; // double + short + int
|
private static int FIXED_SIZE = 14; // double + short + int
|
||||||
|
|
||||||
private static final BitField alwaysCalc = BitFieldFactory.getInstance(0x0001);
|
private static final BitField alwaysCalc = BitFieldFactory.getInstance(0x0001);
|
||||||
|
118
src/java/org/apache/poi/hssf/record/OldFormulaRecord.java
Normal file
118
src/java/org/apache/poi/hssf/record/OldFormulaRecord.java
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
|
||||||
|
package org.apache.poi.hssf.record;
|
||||||
|
|
||||||
|
import org.apache.poi.ss.formula.Formula;
|
||||||
|
import org.apache.poi.ss.formula.ptg.Ptg;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Formula Record (0x0006 / 0x0206 / 0x0406) - holds a formula in
|
||||||
|
* encoded form, along with the value if a number
|
||||||
|
*/
|
||||||
|
public final class OldFormulaRecord {
|
||||||
|
public final static short biff2_sid = 0x0006;
|
||||||
|
public final static short biff3_sid = 0x0206;
|
||||||
|
public final static short biff4_sid = 0x0406;
|
||||||
|
public final static short biff5_sid = 0x0006;
|
||||||
|
|
||||||
|
private short sid;
|
||||||
|
private int field_1_row;
|
||||||
|
private short field_2_column;
|
||||||
|
private int field_3_cell_attrs; // Biff 2
|
||||||
|
private short field_3_xf_index; // Biff 3+
|
||||||
|
private double field_4_value;
|
||||||
|
private short field_5_options;
|
||||||
|
private Formula field_6_parsed_expr;
|
||||||
|
|
||||||
|
public OldFormulaRecord(RecordInputStream ris) {
|
||||||
|
field_1_row = ris.readUShort();
|
||||||
|
field_2_column = ris.readShort();
|
||||||
|
|
||||||
|
if (ris.getSid() == biff2_sid) {
|
||||||
|
field_3_cell_attrs = ris.readUShort() << 8;
|
||||||
|
field_3_cell_attrs += ris.readUByte();
|
||||||
|
} else {
|
||||||
|
field_3_xf_index = ris.readShort();
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO Handle special cached values, for Biff 3+
|
||||||
|
field_4_value = ris.readDouble();
|
||||||
|
|
||||||
|
if (ris.getSid() == biff2_sid) {
|
||||||
|
field_5_options = (short)ris.readUByte();
|
||||||
|
} else {
|
||||||
|
field_5_options = ris.readShort();
|
||||||
|
}
|
||||||
|
|
||||||
|
int expression_len = ris.readShort();
|
||||||
|
int nBytesAvailable = ris.available();
|
||||||
|
field_6_parsed_expr = Formula.read(expression_len, ris, nBytesAvailable);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getRow()
|
||||||
|
{
|
||||||
|
return field_1_row;
|
||||||
|
}
|
||||||
|
|
||||||
|
public short getColumn()
|
||||||
|
{
|
||||||
|
return field_2_column;
|
||||||
|
}
|
||||||
|
|
||||||
|
public short getXFIndex()
|
||||||
|
{
|
||||||
|
return field_3_xf_index;
|
||||||
|
}
|
||||||
|
public int getCellAttrs()
|
||||||
|
{
|
||||||
|
return field_3_cell_attrs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the calculated value of the formula
|
||||||
|
*
|
||||||
|
* @return calculated value
|
||||||
|
*/
|
||||||
|
public double getValue() {
|
||||||
|
return field_4_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the option flags
|
||||||
|
*
|
||||||
|
* @return bitmask
|
||||||
|
*/
|
||||||
|
public short getOptions() {
|
||||||
|
return field_5_options;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the formula tokens. never <code>null</code>
|
||||||
|
*/
|
||||||
|
public Ptg[] getParsedExpression() {
|
||||||
|
return field_6_parsed_expr.getTokens();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Formula getFormula() {
|
||||||
|
return field_6_parsed_expr;
|
||||||
|
}
|
||||||
|
|
||||||
|
public short getSid() {
|
||||||
|
return sid;
|
||||||
|
}
|
||||||
|
}
|
@ -65,7 +65,7 @@ public final class TestOldExcelExtractor extends TestCase {
|
|||||||
assertTrue(text, text.contains("$100,000 or more"));
|
assertTrue(text, text.contains("$100,000 or more"));
|
||||||
assertTrue(text, text.contains("S corporation returns, Form 1120S [10,15]"));
|
assertTrue(text, text.contains("S corporation returns, Form 1120S [10,15]"));
|
||||||
// TODO Get these quotes working correctly
|
// TODO Get these quotes working correctly
|
||||||
// assertTrue(text, text.contains("individual income tax return “short forms.”"));
|
// assertTrue(text, text.contains("individual income tax return \u201Cshort forms.\u201D"));
|
||||||
|
|
||||||
// Formula based strings
|
// Formula based strings
|
||||||
// TODO Find some then test
|
// TODO Find some then test
|
||||||
|
Loading…
Reference in New Issue
Block a user