diff --git a/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java b/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java index 3a08202a4..39a414327 100644 --- a/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java +++ b/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java @@ -55,6 +55,7 @@ public class OldExcelExtractor { private RecordInputStream ris; private Closeable input; private int biffVersion; + private int fileType; public OldExcelExtractor(InputStream input) throws IOException { BufferedInputStream bstream = new BufferedInputStream(input, 8); @@ -83,6 +84,7 @@ public class OldExcelExtractor { private void open(InputStream biffStream) { input = biffStream; ris = new RecordInputStream(biffStream); + prepare(); } private void open(NPOIFSFileSystem fs) throws IOException { input = fs; @@ -95,6 +97,7 @@ public class OldExcelExtractor { } ris = new RecordInputStream(directory.createDocumentInputStream(book)); + prepare(); } public static void main(String[] args) throws Exception { @@ -106,16 +109,14 @@ public class OldExcelExtractor { OldExcelExtractor extractor = new OldExcelExtractor(new File(args[0])); System.out.println(extractor.getText()); } - - /** - * Retrieves the text contents of the file, as best we can - * for these old file formats - */ - public String getText() { - StringBuffer text = new StringBuffer(); + + private void prepare() { + if (! ris.hasNextRecord()) + throw new IllegalArgumentException("File contains no records!"); + ris.nextRecord(); // Work out what version we're dealing with - int bofSid = ris.getNextSid(); + int bofSid = ris.getSid(); switch (bofSid) { case BOFRecord.biff2_sid: biffVersion = 2; @@ -133,6 +134,33 @@ public class OldExcelExtractor { throw new IllegalArgumentException("File does not begin with a BOF, found sid of " + bofSid); } + // Get the type + BOFRecord bof = new BOFRecord(ris); + fileType = bof.getType(); + } + + /** + * The Biff version, largely corresponding to the Excel version + */ + public int getBiffVersion() { + return biffVersion; + } + /** + * The kind of the file, one of {@link BOFRecord#TYPE_WORKSHEET}, + * {@link BOFRecord#TYPE_CHART}, {@link BOFRecord#TYPE_EXCEL_4_MACRO} + * or {@link BOFRecord#TYPE_WORKSPACE_FILE} + */ + public int getFileType() { + return fileType; + } + + /** + * Retrieves the text contents of the file, as best we can + * for these old file formats + */ + public String getText() { + StringBuffer text = new StringBuffer(); + // To track formats and encodings CodepageRecord codepage = null; // TODO track the XFs and Format Strings diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestOldExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestOldExcelExtractor.java index f966cb10c..fdc53d531 100644 --- a/src/testcases/org/apache/poi/hssf/extractor/TestOldExcelExtractor.java +++ b/src/testcases/org/apache/poi/hssf/extractor/TestOldExcelExtractor.java @@ -51,6 +51,10 @@ public final class TestOldExcelExtractor extends POITestCase { // Check we find a few numbers we expect in there assertContains(text, "11"); assertContains(text, "784"); + + // Check the type + assertEquals(4, extractor.getBiffVersion()); + assertEquals(0x10, extractor.getFileType()); } public void testSimpleExcel5() { for (String ver : new String[] {"5", "95"}) { @@ -69,6 +73,10 @@ public final class TestOldExcelExtractor extends POITestCase { // Check we got the sheet names (new formats only) assertContains(text, "Sheet: Feuil3"); + + // Check the type + assertEquals(5, extractor.getBiffVersion()); + assertEquals(0x05, extractor.getFileType()); } }