From bbe3ee9b4d6dffa8f29f49819a61c8c849574215 Mon Sep 17 00:00:00 2001 From: Josh Micich Date: Sat, 3 May 2008 19:53:38 +0000 Subject: [PATCH] 44929 - Improved error handling in HSSFWorkbook when attempting to read a BIFF5 file git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@653117 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/changes.xml | 1 + src/documentation/content/xdocs/status.xml | 1 + .../poi/hssf/usermodel/HSSFWorkbook.java | 263 ++++++++++-------- 3 files changed, 143 insertions(+), 122 deletions(-) diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index ce08eff1a..fee702809 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 44929 - Improved error handling in HSSFWorkbook when attempting to read a BIFF5 file 44675 - Parameter operand classes (function metadata) required to encode SUM() etc properly. Added parse validation for number of parameters 44921 - allow Ptg.writeBytes() to be called on relative ref Ptgs (RefN* and AreaN*) 44914 - Fix/suppress warning message "WARN. Unread n bytes of record 0xNN" diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 7e4277acb..0b1c347b9 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 44929 - Improved error handling in HSSFWorkbook when attempting to read a BIFF5 file 44675 - Parameter operand classes (function metadata) required to encode SUM() etc properly. Added parse validation for number of parameters 44921 - allow Ptg.writeBytes() to be called on relative ref Ptgs (RefN* and AreaN*) 44914 - Fix/suppress warning message "WARN. Unread n bytes of record 0xNN" diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java index a76cfd159..78fe26013 100644 --- a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java @@ -165,9 +165,45 @@ public class HSSFWorkbook extends POIDocument public HSSFWorkbook(POIFSFileSystem fs, boolean preserveNodes) throws IOException { - this(fs.getRoot(), fs, preserveNodes); + this(fs.getRoot(), fs, preserveNodes); } - + + /** + * Normally, the Workbook will be in a POIFS Stream + * called "Workbook". However, some weird XLS generators use "WORKBOOK" + */ + private static final String[] WORKBOOK_DIR_ENTRY_NAMES = { + "Workbook", // as per BIFF8 spec + "WORKBOOK", + }; + + + private static String getWorkbookDirEntryName(DirectoryNode directory) { + + String[] potentialNames = WORKBOOK_DIR_ENTRY_NAMES; + for (int i = 0; i < potentialNames.length; i++) { + String wbName = potentialNames[i]; + try { + directory.getEntry(wbName); + return wbName; + } catch (FileNotFoundException e) { + // continue - to try other options + } + } + + // check for previous version of file format + try { + directory.getEntry("Book"); + throw new IllegalArgumentException("The supplied spreadsheet seems to be Excel 5.0/7.0 (BIFF5) format. " + + "POI only supports BIFF8 format (from Excel versions 97/2000/XP/2003)"); + } catch (FileNotFoundException e) { + // fall through + } + + throw new IllegalArgumentException("The supplied POIFSFileSystem does not contain a BIFF8 'Workbook' entry. " + + "Is it really an excel file?"); + } + /** * given a POI POIFSFileSystem object, and a specific directory * within it, read in its Workbook and populate the high and @@ -185,9 +221,11 @@ public class HSSFWorkbook extends POIDocument public HSSFWorkbook(DirectoryNode directory, POIFSFileSystem fs, boolean preserveNodes) throws IOException { - super(directory, fs); + super(directory, fs); + String workbookName = getWorkbookDirEntryName(directory); + this.preserveNodes = preserveNodes; - + // If we're not preserving nodes, don't track the // POIFS any more if(! preserveNodes) { @@ -197,28 +235,9 @@ public class HSSFWorkbook extends POIDocument sheets = new ArrayList(INITIAL_CAPACITY); names = new ArrayList(INITIAL_CAPACITY); - - // Normally, the Workbook will be in a POIFS Stream - // called "Workbook". However, some wierd XLS generators - // put theirs in one called "WORKBOOK" - String workbookName = "Workbook"; - try { - directory.getEntry(workbookName); - // Is the default name - } catch(FileNotFoundException fe) { - // Try the upper case form - try { - workbookName = "WORKBOOK"; - directory.getEntry(workbookName); - } catch(FileNotFoundException wfe) { - // Doesn't contain it in either form - throw new IllegalArgumentException("The supplied POIFSFileSystem contained neither a 'Workbook' entry, nor a 'WORKBOOK' entry. Is it really an excel file?"); - } - } - // Grab the data from the workbook stream, however - // it happens to be spelt. + // it happens to be spelled. InputStream stream = directory.createDocumentInputStream(workbookName); EventRecordFactory factory = new EventRecordFactory(); @@ -231,7 +250,7 @@ public class HSSFWorkbook extends POIDocument int sheetNum = 0; // convert all LabelRecord records to LabelSSTRecord - convertLabelRecords(records, recOffset); + convertLabelRecords(records, recOffset); while (recOffset < records.size()) { Sheet sheet = Sheet.createSheet(records, sheetNum++, recOffset ); @@ -288,7 +307,7 @@ public class HSSFWorkbook extends POIDocument // none currently } - + /** * This is basically a kludge to deal with the now obsolete Label records. If * you have to read in a sheet that contains Label records, be aware that the rest @@ -304,7 +323,7 @@ public class HSSFWorkbook extends POIDocument * @see org.apache.poi.hssf.record.LabelSSTRecord * @see org.apache.poi.hssf.record.SSTRecord */ - + private void convertLabelRecords(List records, int offset) { if (log.check( POILogger.DEBUG )) @@ -332,7 +351,7 @@ public class HSSFWorkbook extends POIDocument if (log.check( POILogger.DEBUG )) log.log(POILogger.DEBUG, "convertLabelRecords exit"); } - + /** * sets the order of appearance for a given sheet. @@ -345,7 +364,7 @@ public class HSSFWorkbook extends POIDocument sheets.add(pos,sheets.remove(getSheetIndex(sheetname))); workbook.setSheetOrder(sheetname, pos); } - + /** * sets the tab whose data is actually seen when the sheet is opened. * This may be different from the "selected sheet" since excel seems to @@ -357,7 +376,7 @@ public class HSSFWorkbook extends POIDocument public void setSelectedTab(short index) { workbook.getWindowOne().setSelectedTab(index); } - + /** * gets the tab whose data is actually seen when the sheet is opened. * This may be different from the "selected sheet" since excel seems to @@ -368,7 +387,7 @@ public class HSSFWorkbook extends POIDocument public short getSelectedTab() { return workbook.getWindowOne().getSelectedTab(); } - + /** * sets the first tab that is displayed in the list of tabs * in excel. @@ -377,7 +396,7 @@ public class HSSFWorkbook extends POIDocument public void setDisplayedTab(short index) { workbook.getWindowOne().setDisplayedTab(index); } - + /** * sets the first tab that is displayed in the list of tabs * in excel. @@ -399,7 +418,7 @@ public class HSSFWorkbook extends POIDocument /** - * set the sheet name. + * set the sheet name. * Will throw IllegalArgumentException if the name is greater than 31 chars * or contains /\?*[] * @param sheet number (0 based) @@ -413,19 +432,19 @@ public class HSSFWorkbook extends POIDocument { throw new RuntimeException("Sheet out of bounds"); } - + workbook.setSheetName( sheet, name); } - + /** * set the sheet name forcing the encoding. Forcing the encoding IS A BAD IDEA!!! * @deprecated 3-Jan-2006 POI now automatically detects unicode and sets the encoding - * appropriately. Simply use setSheetName(int sheet, String encoding) + * appropriately. Simply use setSheetName(int sheet, String encoding) * @throws IllegalArgumentException if the name is greater than 31 chars * or contains /\?*[] * @param sheet number (0 based) - */ + */ public void setSheetName( int sheet, String name, short encoding ) { if (workbook.doesContainsSheetName( name, sheet )) @@ -480,7 +499,7 @@ public class HSSFWorkbook extends POIDocument /** * Hide or unhide a sheet - * + * * @param sheetnum The sheet number * @param hidden True to mark the sheet as hidden, false otherwise */ @@ -492,7 +511,7 @@ public class HSSFWorkbook extends POIDocument } workbook.setSheetHidden(sheet,hidden); } - + /* * get the sheet's index * @param name sheet name @@ -516,23 +535,23 @@ public class HSSFWorkbook extends POIDocument */ public int getSheetIndex(HSSFSheet sheet) { - for(int i=0; i - * i.e. Reference = $A$1:$B$2 - * @param sheetIndex Zero-based sheet index (0 Represents the first sheet to keep consistent with java) - * @param reference Valid name Reference for the Print Area - */ - public void setPrintArea(int sheetIndex, String reference) - { - NameRecord name = workbook.getSpecificBuiltinRecord(NameRecord.BUILTIN_PRINT_AREA, sheetIndex+1); + /** + * Sets the printarea for the sheet provided + *

+ * i.e. Reference = $A$1:$B$2 + * @param sheetIndex Zero-based sheet index (0 Represents the first sheet to keep consistent with java) + * @param reference Valid name Reference for the Print Area + */ + public void setPrintArea(int sheetIndex, String reference) + { + NameRecord name = workbook.getSpecificBuiltinRecord(NameRecord.BUILTIN_PRINT_AREA, sheetIndex+1); - if (name == null) - name = workbook.createBuiltInName(NameRecord.BUILTIN_PRINT_AREA, sheetIndex+1); + if (name == null) + name = workbook.createBuiltInName(NameRecord.BUILTIN_PRINT_AREA, sheetIndex+1); //adding one here because 0 indicates a global named region; doesnt make sense for print areas - short externSheetIndex = getWorkbook().checkExternSheet(sheetIndex); - name.setExternSheetNumber(externSheetIndex); - name.setAreaReference(reference); + short externSheetIndex = getWorkbook().checkExternSheet(sheetIndex); + name.setExternSheetNumber(externSheetIndex); + name.setAreaReference(reference); - } + } - /** - * For the Convenience of Java Programmers maintaining pointers. - * @see #setPrintArea(int, String) - * @param sheetIndex Zero-based sheet index (0 = First Sheet) - * @param startColumn Column to begin printarea - * @param endColumn Column to end the printarea - * @param startRow Row to begin the printarea - * @param endRow Row to end the printarea - */ - public void setPrintArea(int sheetIndex, int startColumn, int endColumn, - int startRow, int endRow) { + /** + * For the Convenience of Java Programmers maintaining pointers. + * @see #setPrintArea(int, String) + * @param sheetIndex Zero-based sheet index (0 = First Sheet) + * @param startColumn Column to begin printarea + * @param endColumn Column to end the printarea + * @param startRow Row to begin the printarea + * @param endRow Row to end the printarea + */ + public void setPrintArea(int sheetIndex, int startColumn, int endColumn, + int startRow, int endRow) { - //using absolute references because they don't get copied and pasted anyway - CellReference cell = new CellReference(startRow, startColumn, true, true); - String reference = cell.formatAsString(); + //using absolute references because they don't get copied and pasted anyway + CellReference cell = new CellReference(startRow, startColumn, true, true); + String reference = cell.formatAsString(); - cell = new CellReference(endRow, endColumn, true, true); - reference = reference+":"+cell.formatAsString(); + cell = new CellReference(endRow, endColumn, true, true); + reference = reference+":"+cell.formatAsString(); - setPrintArea(sheetIndex, reference); - } + setPrintArea(sheetIndex, reference); + } - /** - * Retrieves the reference for the printarea of the specified sheet, the sheet name is appended to the reference even if it was not specified. - * @param sheetIndex Zero-based sheet index (0 Represents the first sheet to keep consistent with java) - * @return String Null if no print area has been defined - */ - public String getPrintArea(int sheetIndex) - { - NameRecord name = workbook.getSpecificBuiltinRecord(NameRecord.BUILTIN_PRINT_AREA, sheetIndex+1); - if (name == null) return null; - //adding one here because 0 indicates a global named region; doesnt make sense for print areas + /** + * Retrieves the reference for the printarea of the specified sheet, the sheet name is appended to the reference even if it was not specified. + * @param sheetIndex Zero-based sheet index (0 Represents the first sheet to keep consistent with java) + * @return String Null if no print area has been defined + */ + public String getPrintArea(int sheetIndex) + { + NameRecord name = workbook.getSpecificBuiltinRecord(NameRecord.BUILTIN_PRINT_AREA, sheetIndex+1); + if (name == null) return null; + //adding one here because 0 indicates a global named region; doesnt make sense for print areas - return name.getAreaReference(this); - } + return name.getAreaReference(this); + } /** * Delete the printarea for the sheet specified * @param sheetIndex Zero-based sheet index (0 = First Sheet) */ public void removePrintArea(int sheetIndex) { - getWorkbook().removeBuiltinRecord(NameRecord.BUILTIN_PRINT_AREA, sheetIndex+1); + getWorkbook().removeBuiltinRecord(NameRecord.BUILTIN_PRINT_AREA, sheetIndex+1); } /** creates a new named range and add it to the model @@ -1215,7 +1234,7 @@ public class HSSFWorkbook extends POIDocument /** gets the named range index by his name * Note:Excel named ranges are case-insensitive and * this method performs a case-insensitive search. - * + * * @param name named range name * @return named range index */ @@ -1252,9 +1271,9 @@ public class HSSFWorkbook extends POIDocument * @see org.apache.poi.hssf.record.Record */ public HSSFDataFormat createDataFormat() { - if (formatter == null) - formatter = new HSSFDataFormat(workbook); - return formatter; + if (formatter == null) + formatter = new HSSFDataFormat(workbook); + return formatter; } /** remove the named range by his name @@ -1433,9 +1452,9 @@ public class HSSFWorkbook extends POIDocument * Is the workbook protected with a password (not encrypted)? */ public boolean isWriteProtected() { - return this.workbook.isWriteProtected(); + return this.workbook.isWriteProtected(); } - + /** * protect a workbook with a password (not encypted, just sets writeprotect * flags and the password.