diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 1fee98e86..99106b3a0 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -50,6 +50,7 @@ Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx + New helper, HSSFOptimiser, which handles removing duplicated font and style records, to avoid going over the limits in Excel 45322 - Fixed NPE in HSSFSheet.autoSizeColumn() when cell number format was not found 45380 - Missing return keyword in ArrayPtg.toFormulaString() 44958 - Record level support for Data Tables. (No formula parser support though) diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index e6472d6e9..96ff195f2 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -47,6 +47,7 @@ Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx + New helper, HSSFOptimiser, which handles removing duplicated font and style records, to avoid going over the limits in Excel 45322 - Fixed NPE in HSSFSheet.autoSizeColumn() when cell number format was not found 45380 - Missing return keyword in ArrayPtg.toFormulaString() 44958 - Record level support for Data Tables. (No formula parser support though) diff --git a/src/java/org/apache/poi/hssf/model/Workbook.java b/src/java/org/apache/poi/hssf/model/Workbook.java index d56051445..36510eb83 100644 --- a/src/java/org/apache/poi/hssf/model/Workbook.java +++ b/src/java/org/apache/poi/hssf/model/Workbook.java @@ -443,6 +443,17 @@ public class Workbook implements Model numfonts++; return rec; } + + /** + * Removes the given font record from the + * file's list. This will make all + * subsequent font indicies drop by one, + * so you'll need to update those yourself! + */ + public void removeFontRecord(FontRecord rec) { + records.remove(rec); // this updates FontPos for us + numfonts--; + } /** * gets the number of font records @@ -702,6 +713,18 @@ public class Workbook implements Model return retval; } + + /** + * Removes the given ExtendedFormatRecord record from the + * file's list. This will make all + * subsequent font indicies drop by one, + * so you'll need to update those yourself! + */ + public void removeExFormatRecord(ExtendedFormatRecord rec) { + records.remove(rec); // this updates XfPos for us + numxfs--; + } + /** * creates a new Cell-type Extneded Format Record and adds it to the end of diff --git a/src/java/org/apache/poi/hssf/record/FontRecord.java b/src/java/org/apache/poi/hssf/record/FontRecord.java index d6a5ce859..11ba3aaa8 100644 --- a/src/java/org/apache/poi/hssf/record/FontRecord.java +++ b/src/java/org/apache/poi/hssf/record/FontRecord.java @@ -579,6 +579,31 @@ public class FontRecord result = prime * result + field_10_font_name_len; return result; } + + /** + * Does this FontRecord have all the same font + * properties as the supplied FontRecord? + * Note that {@link #equals(Object)} will check + * for exact objects, while this will check + * for exact contents, because normally the + * font record's position makes a big + * difference too. + */ + public boolean sameProperties(FontRecord other) { + return + field_1_font_height == other.field_1_font_height && + field_2_attributes == other.field_2_attributes && + field_3_color_palette_index == other.field_3_color_palette_index && + field_4_bold_weight == other.field_4_bold_weight && + field_5_super_sub_script == other.field_5_super_sub_script && + field_6_underline == other.field_6_underline && + field_7_family == other.field_7_family && + field_8_charset == other.field_8_charset && + field_9_zero == other.field_9_zero && + field_10_font_name_len == other.field_10_font_name_len && + field_11_font_name.equals(other.field_11_font_name) + ; + } /** * Only returns two for the same exact object - diff --git a/src/java/org/apache/poi/hssf/record/UnicodeString.java b/src/java/org/apache/poi/hssf/record/UnicodeString.java index b53fcd485..9919d52c3 100644 --- a/src/java/org/apache/poi/hssf/record/UnicodeString.java +++ b/src/java/org/apache/poi/hssf/record/UnicodeString.java @@ -439,6 +439,23 @@ public class UnicodeString this.field_5_ext_rst = ext_rst; } + + /** + * Swaps all use in the string of one font index + * for use of a different font index. + * Normally only called when fonts have been + * removed / re-ordered + */ + public void swapFontUse(short oldFontIndex, short newFontIndex) { + Iterator i = field_4_format_runs.iterator(); + while(i.hasNext()) { + FormatRun run = (FormatRun)i.next(); + if(run.fontIndex == oldFontIndex) { + run.fontIndex = newFontIndex; + } + } + } + /** * unlike the real records we return the same as "getString()" rather than debug info * @see #getDebugInfo() diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFCell.java b/src/java/org/apache/poi/hssf/usermodel/HSSFCell.java index c81929afe..e4a7e37d7 100644 --- a/src/java/org/apache/poi/hssf/usermodel/HSSFCell.java +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFCell.java @@ -938,14 +938,13 @@ public class HSSFCell implements Cell * object. * @see org.apache.poi.hssf.usermodel.HSSFWorkbook#getCellStyleAt(short) */ - public HSSFCellStyle getCellStyle() { short styleIndex=record.getXFIndex(); ExtendedFormatRecord xf = book.getWorkbook().getExFormatAt(styleIndex); return new HSSFCellStyle(styleIndex, xf, book); } - + /** * used for internationalization, currently -1 for unchanged, 0 for compressed unicode or 1 for 16-bit * diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java b/src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java new file mode 100644 index 000000000..f2c7acc41 --- /dev/null +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java @@ -0,0 +1,261 @@ +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hssf.usermodel; + +import java.util.HashSet; +import java.util.Iterator; + +import org.apache.poi.hssf.record.ExtendedFormatRecord; +import org.apache.poi.hssf.record.FontRecord; +import org.apache.poi.hssf.record.UnicodeString; + +/** + * Excel can get cranky if you give it files containing too + * many (especially duplicate) objects, and this class can + * help to avoid those. + * In general, it's much better to make sure you don't + * duplicate the objects in your code, as this is likely + * to be much faster than creating lots and lots of + * excel objects+records, only to optimise them down to + * many fewer at a later stage. + * However, sometimes this is too hard / tricky to do, which + * is where the use of this class comes in. + */ +public class HSSFOptimiser { + /** + * Goes through the Workbook, optimising the fonts by + * removing duplicate ones. + * For now, only works on fonts used in {@link HSSFCellStyle} + * and {@link HSSFRichTextString}. Any other font uses + * (eg charts, pictures) may well end up broken! + * This can be a slow operation, especially if you have + * lots of cells, cell styles or rich text strings + * @param workbook The workbook in which to optimise the fonts + */ + public static void optimiseFonts(HSSFWorkbook workbook) { + // Where each font has ended up, and if we need to + // delete the record for it. Start off with no change + short[] newPos = + new short[workbook.getWorkbook().getNumberOfFontRecords()+1]; + boolean[] zapRecords = new boolean[newPos.length]; + for(int i=0; i 21 + assertEquals(21, r.getCell(0).getCellValueRecord().getXFIndex()); + // cs2 -> 22 + assertEquals(22, r.getCell(1).getCellValueRecord().getXFIndex()); + // cs3 = cs1 -> 21 + assertEquals(21, r.getCell(2).getCellValueRecord().getXFIndex()); + // cs4 --> 24 -> 23 + assertEquals(23, r.getCell(3).getCellValueRecord().getXFIndex()); + // cs5 --> 25 -> 24 + assertEquals(24, r.getCell(4).getCellValueRecord().getXFIndex()); + // cs6 = cs2 -> 22 + assertEquals(22, r.getCell(5).getCellValueRecord().getXFIndex()); + // cs1 -> 21 + assertEquals(21, r.getCell(6).getCellValueRecord().getXFIndex()); + // cs2 -> 22 + assertEquals(22, r.getCell(7).getCellValueRecord().getXFIndex()); + } +}