From e09aa6d36357f10a22b4a0a342895637a0621e0d Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Tue, 15 Jul 2008 23:40:09 +0000 Subject: [PATCH] Merged revisions 638786-638802,638805-638811,638813-638814,638816-639230,639233-639241,639243-639253,639255-639486,639488-639601,639603-639835,639837-639917,639919-640056,640058-640710,640712-641156,641158-641184,641186-641795,641797-641798,641800-641933,641935-641963,641965-641966,641968-641995,641997-642230,642232-642562,642564-642565,642568-642570,642572-642573,642576-642736,642739-642877,642879,642881-642890,642892-642903,642905-642945,642947-643624,643626-643653,643655-643669,643671,643673-643830,643832-643833,643835-644342,644344-644472,644474-644508,644510-645347,645349-645351,645353-645559,645561-645565,645568-645951,645953-646193,646195-646311,646313-646404,646406-646665,646667-646853,646855-646869,646871-647151,647153-647185,647187-647277,647279-647566,647568-647573,647575,647578-647711,647714-647737,647739-647823,647825-648155,648157-648202,648204-648273,648275,648277-648302,648304-648333,648335-648588,648590-648622,648625-648673,648675-649141,649144,649146-649556,649558-649795,649799,649801-649910,649912-649913,649915-650128,650131-650132,650134-650137,650140-650914,650916-651991,651993-652284,652286-652287,652289,652291,652293-652297,652299-652328,652330-652425,652427-652445,652447-652560,652562-652933,652935,652937-652993,652995-653116,653118-653124,653126-653483,653487-653519,653522-653550,653552-653607,653609-653667,653669-653674,653676-653814,653817-653830,653832-653891,653893-653944,653946-654055,654057-654355,654357-654365,654367-654648,654651-655215,655217-655277,655279-655281,655283-655911,655913-656212,656214,656216-656251,656253-656698,656700-656756,656758-656892,656894-657135,657137-657165,657168-657179,657181-657354,657356-657357,657359-657701,657703-657874,657876-658032,658034-658284,658286,658288-658301,658303-658307,658309-658321,658323-658335,658337-658348,658351,658353-658832,658834-658983,658985,658987-659066,659068-659402,659404-659428,659430-659451,659453-659454,659456-659461,659463-659477,659479-659524,659526-659571,659574,659576-660255,660257-660262,660264-660279,660281-660343,660345-660473,660475-660827,660829-660833,660835-660888,660890-663321,663323-663435,663437-663764,663766-663854,663856-664219,664221-664489,664494-664514,664516-668013,668015-668142,668144-668152,668154,668156-668256,668258,668260-669139,669141-669455,669457-669657,669659-669808,669810-670189,670191-671321,671323-672229,672231-672549,672551-672552,672554-672561,672563-672566,672568,672571-673049,673051-673852,673854-673862,673864-673986,673988-673996,673998-674347,674349-674890,674892-674910,674912-674936,674938-674952,674954-675078,675080-675085,675087-675217,675219-675660,675662-675670,675672-675716,675718-675726,675728-675733,675735-675775,675777-675782,675784,675786-675791,675794-675852,675854-676200,676202,676204,676206-676220,676222-676309,676311-676456,676458-676994,676996-677064 via svnmerge from https://svn.apache.org:443/repos/asf/poi/trunk ........ r677028 | nick | 2008-07-15 21:19:06 +0100 (Tue, 15 Jul 2008) | 1 line Method to check if two fonts have the same contents ........ r677029 | nick | 2008-07-15 21:24:53 +0100 (Tue, 15 Jul 2008) | 1 line Support for removing low level font records ........ r677041 | nick | 2008-07-15 22:15:16 +0100 (Tue, 15 Jul 2008) | 1 line Start on HSSFOptimiser, which removes un-needed cell styles and fonts, fixing up references as it does so ........ r677057 | nick | 2008-07-15 22:38:38 +0100 (Tue, 15 Jul 2008) | 1 line Cell Style optimisations too ........ git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@677103 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/changes.xml | 1 + src/documentation/content/xdocs/status.xml | 1 + .../org/apache/poi/hssf/model/Workbook.java | 23 ++ .../apache/poi/hssf/record/FontRecord.java | 25 ++ .../apache/poi/hssf/record/UnicodeString.java | 17 ++ .../apache/poi/hssf/usermodel/HSSFCell.java | 3 +- .../poi/hssf/usermodel/HSSFOptimiser.java | 261 ++++++++++++++++++ .../hssf/usermodel/HSSFRichTextString.java | 19 +- .../poi/hssf/usermodel/HSSFWorkbook.java | 10 + .../apache/poi/hssf/model/TestWorkbook.java | 33 +++ .../poi/hssf/record/TestFontRecord.java | 17 ++ .../poi/hssf/usermodel/AllUserModelTests.java | 1 + .../poi/hssf/usermodel/TestHSSFOptimiser.java | 240 ++++++++++++++++ 13 files changed, 647 insertions(+), 4 deletions(-) create mode 100644 src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java create mode 100644 src/testcases/org/apache/poi/hssf/usermodel/TestHSSFOptimiser.java diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 1fee98e86..99106b3a0 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -50,6 +50,7 @@ Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx + New helper, HSSFOptimiser, which handles removing duplicated font and style records, to avoid going over the limits in Excel 45322 - Fixed NPE in HSSFSheet.autoSizeColumn() when cell number format was not found 45380 - Missing return keyword in ArrayPtg.toFormulaString() 44958 - Record level support for Data Tables. (No formula parser support though) diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index e6472d6e9..96ff195f2 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -47,6 +47,7 @@ Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx + New helper, HSSFOptimiser, which handles removing duplicated font and style records, to avoid going over the limits in Excel 45322 - Fixed NPE in HSSFSheet.autoSizeColumn() when cell number format was not found 45380 - Missing return keyword in ArrayPtg.toFormulaString() 44958 - Record level support for Data Tables. (No formula parser support though) diff --git a/src/java/org/apache/poi/hssf/model/Workbook.java b/src/java/org/apache/poi/hssf/model/Workbook.java index d56051445..36510eb83 100644 --- a/src/java/org/apache/poi/hssf/model/Workbook.java +++ b/src/java/org/apache/poi/hssf/model/Workbook.java @@ -443,6 +443,17 @@ public class Workbook implements Model numfonts++; return rec; } + + /** + * Removes the given font record from the + * file's list. This will make all + * subsequent font indicies drop by one, + * so you'll need to update those yourself! + */ + public void removeFontRecord(FontRecord rec) { + records.remove(rec); // this updates FontPos for us + numfonts--; + } /** * gets the number of font records @@ -702,6 +713,18 @@ public class Workbook implements Model return retval; } + + /** + * Removes the given ExtendedFormatRecord record from the + * file's list. This will make all + * subsequent font indicies drop by one, + * so you'll need to update those yourself! + */ + public void removeExFormatRecord(ExtendedFormatRecord rec) { + records.remove(rec); // this updates XfPos for us + numxfs--; + } + /** * creates a new Cell-type Extneded Format Record and adds it to the end of diff --git a/src/java/org/apache/poi/hssf/record/FontRecord.java b/src/java/org/apache/poi/hssf/record/FontRecord.java index d6a5ce859..11ba3aaa8 100644 --- a/src/java/org/apache/poi/hssf/record/FontRecord.java +++ b/src/java/org/apache/poi/hssf/record/FontRecord.java @@ -579,6 +579,31 @@ public class FontRecord result = prime * result + field_10_font_name_len; return result; } + + /** + * Does this FontRecord have all the same font + * properties as the supplied FontRecord? + * Note that {@link #equals(Object)} will check + * for exact objects, while this will check + * for exact contents, because normally the + * font record's position makes a big + * difference too. + */ + public boolean sameProperties(FontRecord other) { + return + field_1_font_height == other.field_1_font_height && + field_2_attributes == other.field_2_attributes && + field_3_color_palette_index == other.field_3_color_palette_index && + field_4_bold_weight == other.field_4_bold_weight && + field_5_super_sub_script == other.field_5_super_sub_script && + field_6_underline == other.field_6_underline && + field_7_family == other.field_7_family && + field_8_charset == other.field_8_charset && + field_9_zero == other.field_9_zero && + field_10_font_name_len == other.field_10_font_name_len && + field_11_font_name.equals(other.field_11_font_name) + ; + } /** * Only returns two for the same exact object - diff --git a/src/java/org/apache/poi/hssf/record/UnicodeString.java b/src/java/org/apache/poi/hssf/record/UnicodeString.java index b53fcd485..9919d52c3 100644 --- a/src/java/org/apache/poi/hssf/record/UnicodeString.java +++ b/src/java/org/apache/poi/hssf/record/UnicodeString.java @@ -439,6 +439,23 @@ public class UnicodeString this.field_5_ext_rst = ext_rst; } + + /** + * Swaps all use in the string of one font index + * for use of a different font index. + * Normally only called when fonts have been + * removed / re-ordered + */ + public void swapFontUse(short oldFontIndex, short newFontIndex) { + Iterator i = field_4_format_runs.iterator(); + while(i.hasNext()) { + FormatRun run = (FormatRun)i.next(); + if(run.fontIndex == oldFontIndex) { + run.fontIndex = newFontIndex; + } + } + } + /** * unlike the real records we return the same as "getString()" rather than debug info * @see #getDebugInfo() diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFCell.java b/src/java/org/apache/poi/hssf/usermodel/HSSFCell.java index c81929afe..e4a7e37d7 100644 --- a/src/java/org/apache/poi/hssf/usermodel/HSSFCell.java +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFCell.java @@ -938,14 +938,13 @@ public class HSSFCell implements Cell * object. * @see org.apache.poi.hssf.usermodel.HSSFWorkbook#getCellStyleAt(short) */ - public HSSFCellStyle getCellStyle() { short styleIndex=record.getXFIndex(); ExtendedFormatRecord xf = book.getWorkbook().getExFormatAt(styleIndex); return new HSSFCellStyle(styleIndex, xf, book); } - + /** * used for internationalization, currently -1 for unchanged, 0 for compressed unicode or 1 for 16-bit * diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java b/src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java new file mode 100644 index 000000000..f2c7acc41 --- /dev/null +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFOptimiser.java @@ -0,0 +1,261 @@ +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hssf.usermodel; + +import java.util.HashSet; +import java.util.Iterator; + +import org.apache.poi.hssf.record.ExtendedFormatRecord; +import org.apache.poi.hssf.record.FontRecord; +import org.apache.poi.hssf.record.UnicodeString; + +/** + * Excel can get cranky if you give it files containing too + * many (especially duplicate) objects, and this class can + * help to avoid those. + * In general, it's much better to make sure you don't + * duplicate the objects in your code, as this is likely + * to be much faster than creating lots and lots of + * excel objects+records, only to optimise them down to + * many fewer at a later stage. + * However, sometimes this is too hard / tricky to do, which + * is where the use of this class comes in. + */ +public class HSSFOptimiser { + /** + * Goes through the Workbook, optimising the fonts by + * removing duplicate ones. + * For now, only works on fonts used in {@link HSSFCellStyle} + * and {@link HSSFRichTextString}. Any other font uses + * (eg charts, pictures) may well end up broken! + * This can be a slow operation, especially if you have + * lots of cells, cell styles or rich text strings + * @param workbook The workbook in which to optimise the fonts + */ + public static void optimiseFonts(HSSFWorkbook workbook) { + // Where each font has ended up, and if we need to + // delete the record for it. Start off with no change + short[] newPos = + new short[workbook.getWorkbook().getNumberOfFontRecords()+1]; + boolean[] zapRecords = new boolean[newPos.length]; + for(int i=0; i 21 + assertEquals(21, r.getCell(0).getCellValueRecord().getXFIndex()); + // cs2 -> 22 + assertEquals(22, r.getCell(1).getCellValueRecord().getXFIndex()); + // cs3 = cs1 -> 21 + assertEquals(21, r.getCell(2).getCellValueRecord().getXFIndex()); + // cs4 --> 24 -> 23 + assertEquals(23, r.getCell(3).getCellValueRecord().getXFIndex()); + // cs5 --> 25 -> 24 + assertEquals(24, r.getCell(4).getCellValueRecord().getXFIndex()); + // cs6 = cs2 -> 22 + assertEquals(22, r.getCell(5).getCellValueRecord().getXFIndex()); + // cs1 -> 21 + assertEquals(21, r.getCell(6).getCellValueRecord().getXFIndex()); + // cs2 -> 22 + assertEquals(22, r.getCell(7).getCellValueRecord().getXFIndex()); + } +}