From e7266f81c9153a076626ffc63f0e0470cddef23f Mon Sep 17 00:00:00 2001 From: Tetsuya Kitahata Date: Tue, 19 Aug 2003 14:07:40 +0000 Subject: [PATCH] PR:18846 Obtained from: Submitted by: kamoshida.toshiaki@future.co.jp (Toshiaki Kamoshida) Reviewed by: git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353304 13f79535-47bb-0310-9956-ffa450edef68 --- .../content/xdocs/dtd/changes-v11.dtd | 2 +- .../poi/hssf/record/BoundSheetRecord.java | 4 +- .../apache/poi/hssf/record/FontRecord.java | 4 +- .../apache/poi/hssf/record/FormatRecord.java | 4 +- .../apache/poi/hssf/record/LabelRecord.java | 2 +- .../poi/hssf/record/SeriesTextRecord.java | 4 +- .../apache/poi/hssf/record/StringRecord.java | 4 +- .../apache/poi/hssf/record/StyleRecord.java | 2 +- .../apache/poi/hssf/record/UnicodeString.java | 4 +- .../poi/hssf/record/formula/StringPtg.java | 4 +- src/java/org/apache/poi/util/StringUtil.java | 645 +++++++++--------- .../org/apache/poi/util/TestStringUtil.java | 24 +- 12 files changed, 343 insertions(+), 360 deletions(-) diff --git a/src/documentation/content/xdocs/dtd/changes-v11.dtd b/src/documentation/content/xdocs/dtd/changes-v11.dtd index e753a9bb1..2ec8c5012 100644 --- a/src/documentation/content/xdocs/dtd/changes-v11.dtd +++ b/src/documentation/content/xdocs/dtd/changes-v11.dtd @@ -62,7 +62,7 @@ COPYRIGHT: - + diff --git a/src/java/org/apache/poi/hssf/record/BoundSheetRecord.java b/src/java/org/apache/poi/hssf/record/BoundSheetRecord.java index 6e01246a6..ee3922c7a 100644 --- a/src/java/org/apache/poi/hssf/record/BoundSheetRecord.java +++ b/src/java/org/apache/poi/hssf/record/BoundSheetRecord.java @@ -138,7 +138,7 @@ public class BoundSheetRecord int nameLength = LittleEndian.ubyteToInt( field_3_sheetname_length ); if ( ( field_4_compressed_unicode_flag & 0x01 ) == 1 ) { - field_5_sheetname = StringUtil.getFromUnicodeHigh( data, 8 + offset, nameLength ); + field_5_sheetname = StringUtil.getFromUnicodeLE( data, 8 + offset, nameLength ); } else { @@ -300,7 +300,7 @@ public class BoundSheetRecord data[11 + offset] = getCompressedUnicodeFlag(); if ( ( field_4_compressed_unicode_flag & 0x01 ) == 1 ) - StringUtil.putUncompressedUnicode( getSheetname(), data, 12 + offset ); + StringUtil.putUnicodeLE( getSheetname(), data, 12 + offset ); else StringUtil.putCompressedUnicode( getSheetname(), data, 12 + offset ); diff --git a/src/java/org/apache/poi/hssf/record/FontRecord.java b/src/java/org/apache/poi/hssf/record/FontRecord.java index e010122cb..1a265d5b4 100644 --- a/src/java/org/apache/poi/hssf/record/FontRecord.java +++ b/src/java/org/apache/poi/hssf/record/FontRecord.java @@ -169,7 +169,7 @@ public class FontRecord } else { // is not compressed unicode - field_11_font_name = StringUtil.getFromUnicodeHigh(data, 16, + field_11_font_name = StringUtil.getFromUnicodeLE(data, 16, field_10_font_name_len); } } @@ -579,7 +579,7 @@ public class FontRecord data[ 18 + offset ] = getFontNameLength(); data[ 19 + offset ] = ( byte ) 1; if (getFontName() != null) { - StringUtil.putUncompressedUnicode(getFontName(), data, 20 + offset); + StringUtil.putUnicodeLE(getFontName(), data, 20 + offset); } return getRecordSize(); } diff --git a/src/java/org/apache/poi/hssf/record/FormatRecord.java b/src/java/org/apache/poi/hssf/record/FormatRecord.java index 241967a55..c5d6b9931 100644 --- a/src/java/org/apache/poi/hssf/record/FormatRecord.java +++ b/src/java/org/apache/poi/hssf/record/FormatRecord.java @@ -128,7 +128,7 @@ public class FormatRecord if ( field_3_unicode_flag ) { // unicode - field_4_formatstring = StringUtil.getFromUnicodeHigh( data, 5 + offset, field_3_unicode_len ); + field_4_formatstring = StringUtil.getFromUnicodeLE( data, 5 + offset, field_3_unicode_len ); } else { // not unicode @@ -264,7 +264,7 @@ public class FormatRecord if ( field_3_unicode_flag ) { // unicode - StringUtil.putUncompressedUnicode( getFormatString(), data, 9 + offset ); + StringUtil.putUnicodeLE( getFormatString(), data, 9 + offset ); } else { // not unicode diff --git a/src/java/org/apache/poi/hssf/record/LabelRecord.java b/src/java/org/apache/poi/hssf/record/LabelRecord.java index 558584ba9..950a7cc09 100644 --- a/src/java/org/apache/poi/hssf/record/LabelRecord.java +++ b/src/java/org/apache/poi/hssf/record/LabelRecord.java @@ -152,7 +152,7 @@ public class LabelRecord field_5_unicode_flag = data[ 8 + offset ]; if (isUnCompressedUnicode()) { - field_6_value = StringUtil.getFromUnicode(data, 8 + offset, + field_6_value = StringUtil.getFromUnicodeBE(data, 8 + offset, field_4_string_len); } else diff --git a/src/java/org/apache/poi/hssf/record/SeriesTextRecord.java b/src/java/org/apache/poi/hssf/record/SeriesTextRecord.java index af3fdb7b2..c5054e0f3 100644 --- a/src/java/org/apache/poi/hssf/record/SeriesTextRecord.java +++ b/src/java/org/apache/poi/hssf/record/SeriesTextRecord.java @@ -133,7 +133,7 @@ public class SeriesTextRecord field_1_id = LittleEndian.getShort(data, pos + 0x0 + offset); field_2_textLength = data[ pos + 0x2 + offset ]; field_3_undocumented = data[ pos + 0x3 + offset ]; - field_4_text = StringUtil.getFromUnicodeHigh(data, pos + 0x4 + offset, ((field_2_textLength *2)/2)); + field_4_text = StringUtil.getFromUnicodeLE(data, pos + 0x4 + offset, ((field_2_textLength *2)/2)); } @@ -172,7 +172,7 @@ public class SeriesTextRecord LittleEndian.putShort(data, 4 + offset + pos, field_1_id); data[ 6 + offset + pos ] = field_2_textLength; data[ 7 + offset + pos ] = field_3_undocumented; - StringUtil.putUncompressedUnicodeHigh(field_4_text, data, 8 + offset + pos); + StringUtil.putUnicodeLE(field_4_text, data, 8 + offset + pos); return getRecordSize(); } diff --git a/src/java/org/apache/poi/hssf/record/StringRecord.java b/src/java/org/apache/poi/hssf/record/StringRecord.java index ed157bb52..4e28cad6d 100644 --- a/src/java/org/apache/poi/hssf/record/StringRecord.java +++ b/src/java/org/apache/poi/hssf/record/StringRecord.java @@ -130,7 +130,7 @@ public class StringRecord field_2_unicode_flag = data[ 2 + offset ]; if (isUnCompressedUnicode()) { - field_3_string = StringUtil.getFromUnicode(data, 3 + offset, field_1_string_length ); + field_3_string = StringUtil.getFromUnicodeBE(data, 3 + offset, field_1_string_length ); } else { @@ -182,7 +182,7 @@ public class StringRecord data[6 + offset] = field_2_unicode_flag; if (isUnCompressedUnicode()) { - StringUtil.putUncompressedUnicode(field_3_string, data, 7 + offset); + StringUtil.putUnicodeLE(field_3_string, data, 7 + offset); } else { diff --git a/src/java/org/apache/poi/hssf/record/StyleRecord.java b/src/java/org/apache/poi/hssf/record/StyleRecord.java index d6bb482e5..efefbb0e9 100644 --- a/src/java/org/apache/poi/hssf/record/StyleRecord.java +++ b/src/java/org/apache/poi/hssf/record/StyleRecord.java @@ -143,7 +143,7 @@ public class StyleRecord field_3_string_options = data[4+offset]; if (fHighByte.isSet(field_3_string_options)) { - field_4_name= StringUtil.getFromUnicode(data,offset+5,field_2_name_length); + field_4_name= StringUtil.getFromUnicodeBE(data,offset+5,field_2_name_length); }else { field_4_name=StringUtil.getFromCompressedUnicode(data,offset+5,field_2_name_length); } diff --git a/src/java/org/apache/poi/hssf/record/UnicodeString.java b/src/java/org/apache/poi/hssf/record/UnicodeString.java index ad97d25bf..ba1479ea5 100644 --- a/src/java/org/apache/poi/hssf/record/UnicodeString.java +++ b/src/java/org/apache/poi/hssf/record/UnicodeString.java @@ -328,7 +328,7 @@ offset); } else { - StringUtil.putUncompressedUnicode(unicodeString, data, + StringUtil.putUnicodeLE(unicodeString, data, 0x3 + offset); } } @@ -340,7 +340,7 @@ offset); } else { - StringUtil.putUncompressedUnicode(getString(), data, + StringUtil.putUnicodeLE(getString(), data, 0x3 + offset); } } diff --git a/src/java/org/apache/poi/hssf/record/formula/StringPtg.java b/src/java/org/apache/poi/hssf/record/formula/StringPtg.java index a66861914..5bccc034f 100644 --- a/src/java/org/apache/poi/hssf/record/formula/StringPtg.java +++ b/src/java/org/apache/poi/hssf/record/formula/StringPtg.java @@ -89,7 +89,7 @@ public class StringPtg field_1_length = data[offset]; field_2_options = data[offset+1]; if (fHighByte.isSet(field_2_options)) { - field_3_string= StringUtil.getFromUnicode(data,offset+2,field_1_length); + field_3_string= StringUtil.getFromUnicodeBE(data,offset+2,field_1_length); }else { field_3_string=StringUtil.getFromCompressedUnicode(data,offset+2,field_1_length); } @@ -130,7 +130,7 @@ public class StringPtg array[ offset + 1 ] = field_1_length; array[ offset + 2 ] = field_2_options; if (fHighByte.isSet(field_2_options)) { - StringUtil.putUncompressedUnicode(getValue(),array,offset+3); + StringUtil.putUnicodeLE(getValue(),array,offset+3); }else { StringUtil.putCompressedUnicode(getValue(),array,offset+3); } diff --git a/src/java/org/apache/poi/util/StringUtil.java b/src/java/org/apache/poi/util/StringUtil.java index 1a3947547..910ee9781 100644 --- a/src/java/org/apache/poi/util/StringUtil.java +++ b/src/java/org/apache/poi/util/StringUtil.java @@ -1,363 +1,346 @@ /* - * ==================================================================== - * The Apache Software License, Version 1.1 + * ==================================================================== + * The Apache Software License, Version 1.1 * - * Copyright (c) 2002 The Apache Software Foundation. All rights - * reserved. + * Copyright (c) 2003 The Apache Software Foundation. All rights + * reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache POI" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache POI" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. * - * 5. Products derived from this software may not be called "Apache", - * "Apache POI", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. + * 5. Products derived from this software may not be called "Apache", + * "Apache POI", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . */ package org.apache.poi.util; import java.io.UnsupportedEncodingException; - import java.text.NumberFormat; import java.text.FieldPosition; - -/** - * Title: String Utility Description: Collection of string handling utilities +import java.util.Arrays; +/** + * Title: String Utility Description: Collection of string handling utilities + * * - * Now it is quite confusing: the method pairs, in which - * one of them write data and other read written data are: - * putUncompressedUnicodeHigh and getFromUnicode - * putUncompressedUnicode and getFromUnicodeHigh - * - *@author Andrew C. Oliver - *@author Sergei Kozello (sergeikozello at mail.ru) - *@created May 10, 2002 - *@version 1.0 + *@author Andrew C. Oliver + *@author Sergei Kozello (sergeikozello at mail.ru) + *@author Toshiaki Kamoshida (kamoshida.toshiaki at future dot co dot jp) + *@created May 10, 2002 + *@version 1.0 */ - public class StringUtil { - - private final static String ENCODING="ISO-8859-1"; - /** - * Constructor for the StringUtil object - */ - private StringUtil() { } + private final static String ENCODING = "ISO-8859-1"; + /** + * Constructor for the StringUtil object + */ + private StringUtil() { + } - - /** - * given a byte array of 16-bit unicode characters, compress to 8-bit and - * return a string - * - * { 0x16, 0x00 } -> 0x16 - * - *@param string the byte array to be converted - *@param offset the initial offset into the - * byte array. it is assumed that string[ offset ] and string[ offset + - * 1 ] contain the first 16-bit unicode character - *@param len - *@return the converted string - *@exception ArrayIndexOutOfBoundsException if offset is out of bounds for - * the byte array (i.e., is negative or is greater than or equal to - * string.length) - *@exception IllegalArgumentException if len is too large (i.e., - * there is not enough data in string to create a String of that - * length) - *@len the length of the final string - */ + /** + * given a byte array of 16-bit unicode characters, compress to 8-bit and + * return a string + * + * { 0x16, 0x00 } -0x16 + * + *@param string the byte array to be converted + *@param offset the initial offset into the + * byte array. it is assumed that string[ offset ] and string[ offset + + * 1 ] contain the first 16-bit unicode character + *@param len + *@return the converted string + *@exception ArrayIndexOutOfBoundsException if offset is out of bounds for + * the byte array (i.e., is negative or is greater than or equal to + * string.length) + *@exception IllegalArgumentException if len is too large (i.e., + * there is not enough data in string to create a String of that + * length) + *@len the length of the final string + */ + public static String getFromUnicodeLE( + final byte[] string, + final int offset, + final int len) + throws ArrayIndexOutOfBoundsException, IllegalArgumentException { + if ((offset < 0) || (offset >= string.length)) { + throw new ArrayIndexOutOfBoundsException("Illegal offset"); + } + if ((len < 0) || (((string.length - offset) / 2) < len)) { + throw new IllegalArgumentException("Illegal length"); + } - public static String getFromUnicodeHigh(final byte[] string, - final int offset, final int len) - throws ArrayIndexOutOfBoundsException, IllegalArgumentException { + try { + return new String(string, offset, len * 2, "UTF-16LE"); + } catch (UnsupportedEncodingException e) { + throw new InternalError(); /*unreachable*/ + } + } - if ((offset < 0) || (offset >= string.length)) { - throw new ArrayIndexOutOfBoundsException("Illegal offset"); - } - if ((len < 0) || (((string.length - offset) / 2) < len)) { - throw new IllegalArgumentException("Illegal length"); - } - - char[] chars = new char[ len ]; - for ( int i = 0; i < chars.length; i++ ) { - chars[i] = (char)( string[ offset + ( 2*i ) ] & 0xFF | - ( string[ offset + ( 2*i+1 ) ] << 8 ) ); - } + /** + * given a byte array of 16-bit unicode characters, compress to 8-bit and + * return a string + * + * { 0x16, 0x00 } -0x16 + * + *@param string the byte array to be converted + *@return the converted string + */ + public static String getFromUnicodeLE(final byte[] string) { + return getFromUnicodeLE(string, 0, string.length / 2); + } - return new String( chars ); - } - - - /** - * given a byte array of 16-bit unicode characters, compress to 8-bit and - * return a string - * - * { 0x16, 0x00 } -> 0x16 - * - *@param string the byte array to be converted - *@return the converted string - */ + /** + * given a byte array of 16-bit unicode characters, compress to 8-bit and + * return a string + * + * { 0x00, 0x16 } -0x16 + * + *@param string the byte array to be converted + **@param offset the initial offset into the + * byte array. it is assumed that string[ offset ] and string[ offset + + * 1 ] contain the first 16-bit unicode character + *@param len + *@return the converted string + *@exception ArrayIndexOutOfBoundsException if offset is out of bounds for + * the byte array (i.e., is negative or is greater than or equal to + * string.length) + *@exception IllegalArgumentException if len is too large (i.e., + * there is not enough data in string to create a String of that + * length) + *@len the length of the final string + */ + public static String getFromUnicodeBE( + final byte[] string, + final int offset, + final int len) + throws ArrayIndexOutOfBoundsException, IllegalArgumentException { + if ((offset < 0) || (offset >= string.length)) { + throw new ArrayIndexOutOfBoundsException("Illegal offset"); + } + if ((len < 0) || (((string.length - offset) / 2) < len)) { + throw new IllegalArgumentException("Illegal length"); + } + try { + return new String(string, offset, len * 2, "UTF-16BE"); + } catch (UnsupportedEncodingException e) { + throw new InternalError(); /*unreachable*/ + } + } - public static String getFromUnicodeHigh( final byte[] string ) { - return getFromUnicodeHigh( string, 0, string.length / 2 ); - } + /** + * given a byte array of 16-bit unicode characters, compress to 8-bit and + * return a string + * + * { 0x00, 0x16 } -0x16 + * + *@param string the byte array to be converted + *@return the converted string + */ + public static String getFromUnicodeBE(final byte[] string) { + return getFromUnicodeBE(string, 0, string.length / 2); + } + /** + * read compressed unicode(8bit) + * + * @param string byte array to read + * @param offset offset to read byte array + * @param len length to read byte array + * @return String generated String instance by reading byte array + */ + public static String getFromCompressedUnicode( + final byte[] string, + final int offset, + final int len) { + try { + return new String(string, offset, len, "ISO-8859-1"); + } catch (UnsupportedEncodingException e) { + throw new InternalError(); /* unreachable */ + } + } - /** - * given a byte array of 16-bit unicode characters, compress to 8-bit and - * return a string - * - * { 0x00, 0x16 } -> 0x16 - * - *@param string the byte array to be converted - *@param offset the initial offset into the - * byte array. it is assumed that string[ offset ] and string[ offset + - * 1 ] contain the first 16-bit unicode character - *@param len - *@return the converted string - *@exception ArrayIndexOutOfBoundsException if offset is out of bounds for - * the byte array (i.e., is negative or is greater than or equal to - * string.length) - *@exception IllegalArgumentException if len is too large (i.e., - * there is not enough data in string to create a String of that - * length) - *@len the length of the final string - */ + /** + * write compressed unicode + * + *@param input the String containing the data to be written + *@param output the byte array to which the data is to be written + *@param offset an offset into the byte arrat at which the data is start + * when written + */ + public static void putCompressedUnicode( + final String input, + final byte[] output, + final int offset) { + try { + byte[] bytes = input.getBytes("ISO-8859-1"); + System.arraycopy(bytes, 0, output, offset, bytes.length); + } catch (UnsupportedEncodingException e) { + throw new InternalError(); /*unreachable*/ + } + } - public static String getFromUnicode(final byte[] string, - final int offset, final int len) - throws ArrayIndexOutOfBoundsException, IllegalArgumentException { - if ((offset < 0) || (offset >= string.length)) { - throw new ArrayIndexOutOfBoundsException("Illegal offset"); - } - if ((len < 0) || (((string.length - offset) / 2) < len)) { - throw new IllegalArgumentException("Illegal length"); - } + /** + * Write uncompressed unicode + * + *@param input the String containing the unicode data to be written + *@param output the byte array to hold the uncompressed unicode + *@param offset the offset to start writing into the byte array + */ + public static void putUnicodeLE( + final String input, + final byte[] output, + final int offset) { + try { + byte[] bytes = input.getBytes("UTF-16LE"); + System.arraycopy(bytes, 0, output, offset, bytes.length); + } catch (UnsupportedEncodingException e) { + throw new InternalError(); /*unreachable*/ + } + } - - char[] chars = new char[ len ]; - for ( int i = 0; i < chars.length; i++ ) { - chars[i] = (char)( ( string[ offset + ( 2*i ) ] << 8 ) + - string[ offset + ( 2*i+1 ) ] ); - } - - return new String( chars ); - } + /** + * Write uncompressed unicode + * + *@param input the String containing the unicode data to be written + *@param output the byte array to hold the uncompressed unicode + *@param offset the offset to start writing into the byte array + */ + public static void putUnicodeBE( + final String input, + final byte[] output, + final int offset) { + try { + byte[] bytes = input.getBytes("UTF-16BE"); + System.arraycopy(bytes, 0, output, offset, bytes.length); + } catch (UnsupportedEncodingException e) { + throw new InternalError(); /*unreachable*/ + } + } + /** + * Description of the Method + * + *@param message Description of the Parameter + *@param params Description of the Parameter + *@return Description of the Return Value + */ + public static String format(String message, Object[] params) { + int currentParamNumber = 0; + StringBuffer formattedMessage = new StringBuffer(); + for (int i = 0; i < message.length(); i++) { + if (message.charAt(i) == '%') { + if (currentParamNumber >= params.length) { + formattedMessage.append("?missing data?"); + } else if ( + (params[currentParamNumber] instanceof Number) + && (i + 1 < message.length())) { + i + += matchOptionalFormatting( + (Number) params[currentParamNumber++], + message.substring(i + 1), + formattedMessage); + } else { + formattedMessage.append( + params[currentParamNumber++].toString()); + } + } else { + if ((message.charAt(i) == '\\') + && (i + 1 < message.length()) + && (message.charAt(i + 1) == '%')) { + formattedMessage.append('%'); + i++; + } else { + formattedMessage.append(message.charAt(i)); + } + } + } + return formattedMessage.toString(); + } - /** - * given a byte array of 16-bit unicode characters, compress to 8-bit and - * return a string - * - * { 0x00, 0x16 } -> 0x16 - * - *@param string the byte array to be converted - *@return the converted string - */ + /** + * Description of the Method + * + *@param number Description of the Parameter + *@param formatting Description of the Parameter + *@param outputTo Description of the Parameter + *@return Description of the Return Value + */ + private static int matchOptionalFormatting( + Number number, + String formatting, + StringBuffer outputTo) { + NumberFormat numberFormat = NumberFormat.getInstance(); + if ((0 < formatting.length()) + && Character.isDigit(formatting.charAt(0))) { + numberFormat.setMinimumIntegerDigits( + Integer.parseInt(formatting.charAt(0) + "")); + if ((2 < formatting.length()) + && (formatting.charAt(1) == '.') + && Character.isDigit(formatting.charAt(2))) { + numberFormat.setMaximumFractionDigits( + Integer.parseInt(formatting.charAt(2) + "")); + numberFormat.format(number, outputTo, new FieldPosition(0)); + return 3; + } + numberFormat.format(number, outputTo, new FieldPosition(0)); + return 1; + } else if ( + (0 < formatting.length()) && (formatting.charAt(0) == '.')) { + if ((1 < formatting.length()) + && Character.isDigit(formatting.charAt(1))) { + numberFormat.setMaximumFractionDigits( + Integer.parseInt(formatting.charAt(1) + "")); + numberFormat.format(number, outputTo, new FieldPosition(0)); + return 2; + } + } + numberFormat.format(number, outputTo, new FieldPosition(0)); + return 1; + } - public static String getFromUnicode(final byte[] string) { - return getFromUnicode(string, 0, string.length / 2); - } - - - /** - * read compressed unicode(8bit) - * - * @author Toshiaki Kamoshida(kamoshida.toshiaki at future dot co dot jp) - * - * @param string byte array to read - * @param offset offset to read byte array - * @param len length to read byte array - * @return String generated String instance by reading byte array - */ - public static String getFromCompressedUnicode(final byte[] string, - final int offset, final int len){ - try{ - return new String(string,offset,len,"ISO-8859-1"); - } - catch(UnsupportedEncodingException e){ - throw new InternalError();/* unreachable */ - } - } - - /** - * write compressed unicode - * - *@param input the String containing the data to be written - *@param output the byte array to which the data is to be written - *@param offset an offset into the byte arrat at which the data is start - * when written - */ - - public static void putCompressedUnicode(final String input, - final byte[] output, - final int offset) { - int strlen = input.length(); - - for (int k = 0; k < strlen; k++) { - output[offset + k] = (byte) input.charAt(k); - } - } - - - /** - * Write uncompressed unicode - * - *@param input the String containing the unicode data to be written - *@param output the byte array to hold the uncompressed unicode - *@param offset the offset to start writing into the byte array - */ - - public static void putUncompressedUnicode(final String input, - final byte[] output, - final int offset) { - int strlen = input.length(); - - for (int k = 0; k < strlen; k++) { - char c = input.charAt(k); - - output[offset + (2 * k)] = (byte) c; - output[offset + (2 * k) + 1] = (byte) (c >> 8); - } - } - - /** - * Write uncompressed unicode - * - *@param input the String containing the unicode data to be written - *@param output the byte array to hold the uncompressed unicode - *@param offset the offset to start writing into the byte array - */ - - public static void putUncompressedUnicodeHigh(final String input, - final byte[] output, - final int offset) { - int strlen = input.length(); - - for (int k = 0; k < strlen; k++) { - char c = input.charAt(k); - - output[offset + (2 * k)] = (byte) (c >> 8); - output[offset + (2 * k)] = (byte) c; - } - } - - - - - /** - * Description of the Method - * - *@param message Description of the Parameter - *@param params Description of the Parameter - *@return Description of the Return Value - */ - public static String format(String message, Object[] params) { - int currentParamNumber = 0; - StringBuffer formattedMessage = new StringBuffer(); - - for (int i = 0; i < message.length(); i++) { - if (message.charAt(i) == '%') { - if (currentParamNumber >= params.length) { - formattedMessage.append("?missing data?"); - } else if ((params[currentParamNumber] instanceof Number) - && (i + 1 < message.length())) { - i += matchOptionalFormatting( - (Number) params[currentParamNumber++], - message.substring(i + 1), formattedMessage); - } else { - formattedMessage.append(params[currentParamNumber++].toString()); - } - } else { - if ((message.charAt(i) == '\\') && (i + 1 < message.length()) - && (message.charAt(i + 1) == '%')) { - formattedMessage.append('%'); - i++; - } else { - formattedMessage.append(message.charAt(i)); - } - } - } - return formattedMessage.toString(); - } - - - /** - * Description of the Method - * - *@param number Description of the Parameter - *@param formatting Description of the Parameter - *@param outputTo Description of the Parameter - *@return Description of the Return Value - */ - private static int matchOptionalFormatting(Number number, - String formatting, - StringBuffer outputTo) { - NumberFormat numberFormat = NumberFormat.getInstance(); - - if ((0 < formatting.length()) - && Character.isDigit(formatting.charAt(0))) { - numberFormat.setMinimumIntegerDigits(Integer.parseInt(formatting.charAt(0) + "")); - if ((2 < formatting.length()) && (formatting.charAt(1) == '.') - && Character.isDigit(formatting.charAt(2))) { - numberFormat.setMaximumFractionDigits(Integer.parseInt(formatting.charAt(2) + "")); - numberFormat.format(number, outputTo, new FieldPosition(0)); - return 3; - } - numberFormat.format(number, outputTo, new FieldPosition(0)); - return 1; - } else if ((0 < formatting.length()) && (formatting.charAt(0) == '.')) { - if ((1 < formatting.length()) - && Character.isDigit(formatting.charAt(1))) { - numberFormat.setMaximumFractionDigits(Integer.parseInt(formatting.charAt(1) + "")); - numberFormat.format(number, outputTo, new FieldPosition(0)); - return 2; - } - } - numberFormat.format(number, outputTo, new FieldPosition(0)); - return 1; - } - - /** - * @return the encoding we want to use (ISO-8859-1) - */ - public static String getPreferredEncoding() { - return ENCODING; - } + /** + * @return the encoding we want to use (ISO-8859-1) + */ + public static String getPreferredEncoding() { + return ENCODING; + } } diff --git a/src/testcases/org/apache/poi/util/TestStringUtil.java b/src/testcases/org/apache/poi/util/TestStringUtil.java index 6de440c13..2504fb521 100644 --- a/src/testcases/org/apache/poi/util/TestStringUtil.java +++ b/src/testcases/org/apache/poi/util/TestStringUtil.java @@ -92,7 +92,7 @@ public class TestStringUtil } assertEquals( "abcdefghijklmnop", - StringUtil.getFromUnicode( test_data ) ); + StringUtil.getFromUnicodeBE( test_data ) ); } /** @@ -112,7 +112,7 @@ public class TestStringUtil }; assertEquals( "\u0422\u0435\u0441\u0442 test", - StringUtil.getFromUnicode( test_data ) ); + StringUtil.getFromUnicodeBE( test_data ) ); } /** @@ -133,7 +133,7 @@ public class TestStringUtil assertEquals( "\u0422\u0435\u0441\u0442 test", - StringUtil.getFromUnicodeHigh( test_data ) ); + StringUtil.getFromUnicodeLE( test_data ) ); } /** @@ -149,12 +149,12 @@ public class TestStringUtil test_data[index++] = (byte) ( 'a' + k ); } assertEquals( "abcdefghijklmno", - StringUtil.getFromUnicode( test_data, 0, 15 ) ); + StringUtil.getFromUnicodeBE( test_data, 0, 15 ) ); assertEquals( "bcdefghijklmnop", - StringUtil.getFromUnicode( test_data, 2, 15 ) ); + StringUtil.getFromUnicodeBE( test_data, 2, 15 ) ); try { - StringUtil.getFromUnicode( test_data, -1, 16 ); + StringUtil.getFromUnicodeBE( test_data, -1, 16 ); fail( "Should have caught ArrayIndexOutOfBoundsException" ); } catch ( ArrayIndexOutOfBoundsException ignored ) @@ -164,7 +164,7 @@ public class TestStringUtil try { - StringUtil.getFromUnicode( test_data, 32, 16 ); + StringUtil.getFromUnicodeBE( test_data, 32, 16 ); fail( "Should have caught ArrayIndexOutOfBoundsException" ); } catch ( ArrayIndexOutOfBoundsException ignored ) @@ -174,7 +174,7 @@ public class TestStringUtil try { - StringUtil.getFromUnicode( test_data, 1, 16 ); + StringUtil.getFromUnicodeBE( test_data, 1, 16 ); fail( "Should have caught IllegalArgumentException" ); } catch ( IllegalArgumentException ignored ) @@ -184,7 +184,7 @@ public class TestStringUtil try { - StringUtil.getFromUnicode( test_data, 1, -1 ); + StringUtil.getFromUnicodeBE( test_data, 1, -1 ); fail( "Should have caught IllegalArgumentException" ); } catch ( IllegalArgumentException ignored ) @@ -248,13 +248,13 @@ public class TestStringUtil (byte) 'd', (byte) 0 }; - StringUtil.putUncompressedUnicode( input, output, 0 ); + StringUtil.putUnicodeLE( input, output, 0 ); for ( int j = 0; j < expected_output.length; j++ ) { assertEquals( "testing offset " + j, expected_output[j], output[j] ); } - StringUtil.putUncompressedUnicode( input, output, + StringUtil.putUnicodeLE( input, output, 100 - expected_output.length ); for ( int j = 0; j < expected_output.length; j++ ) { @@ -263,7 +263,7 @@ public class TestStringUtil } try { - StringUtil.putUncompressedUnicode( input, output, + StringUtil.putUnicodeLE( input, output, 101 - expected_output.length ); fail( "Should have caught ArrayIndexOutOfBoundsException" ); }