Rewinded: StringUtil.java: Sync to REL_2 BRANCH

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353249 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tetsuya Kitahata 2003-07-26 08:53:22 +00:00
parent b07e707db1
commit 9af1ac7f8a

View File

@ -1,346 +1,363 @@
/* /*
* ==================================================================== * ====================================================================
* The Apache Software License, Version 1.1 * The Apache Software License, Version 1.1
* *
* Copyright (c) 2003 The Apache Software Foundation. All rights * Copyright (c) 2002 The Apache Software Foundation. All rights
* reserved. * reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions
* are met: * are met:
* *
* 1. Redistributions of source code must retain the above copyright * 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer. * notice, this list of conditions and the following disclaimer.
* *
* 2. Redistributions in binary form must reproduce the above copyright * 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in * notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the * the documentation and/or other materials provided with the
* distribution. * distribution.
* *
* 3. The end-user documentation included with the redistribution, * 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment: * if any, must include the following acknowledgment:
* "This product includes software developed by the * "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)." * Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself, * Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear. * if and wherever such third-party acknowledgments normally appear.
* *
* 4. The names "Apache" and "Apache Software Foundation" and * 4. The names "Apache" and "Apache Software Foundation" and
* "Apache POI" must not be used to endorse or promote products * "Apache POI" must not be used to endorse or promote products
* derived from this software without prior written permission. For * derived from this software without prior written permission. For
* written permission, please contact apache@apache.org. * written permission, please contact apache@apache.org.
* *
* 5. Products derived from this software may not be called "Apache", * 5. Products derived from this software may not be called "Apache",
* "Apache POI", nor may "Apache" appear in their name, without * "Apache POI", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation. * prior written permission of the Apache Software Foundation.
* *
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE. * SUCH DAMAGE.
* ==================================================================== * ====================================================================
* *
* This software consists of voluntary contributions made by many * This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more * individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see * information on the Apache Software Foundation, please see
* <http://www.apache.org/>. * <http://www.apache.org/>.
*/ */
package org.apache.poi.util; package org.apache.poi.util;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.text.NumberFormat; import java.text.NumberFormat;
import java.text.FieldPosition; import java.text.FieldPosition;
import java.util.Arrays;
/** /**
* Title: String Utility Description: Collection of string handling utilities * Title: String Utility Description: Collection of string handling utilities
*
* *
*@author Andrew C. Oliver * Now it is quite confusing: the method pairs, in which
*@author Sergei Kozello (sergeikozello at mail.ru) * one of them write data and other read written data are:
*@author Toshiaki Kamoshida (kamoshida.toshiaki at future dot co dot jp) * putUncompressedUnicodeHigh and getFromUnicode
*@created May 10, 2002 * putUncompressedUnicode and getFromUnicodeHigh
*@version 1.0 *
*@author Andrew C. Oliver
*@author Sergei Kozello (sergeikozello at mail.ru)
*@created May 10, 2002
*@version 1.0
*/ */
public class StringUtil { public class StringUtil {
private final static String ENCODING = "ISO-8859-1";
/** private final static String ENCODING="ISO-8859-1";
* Constructor for the StringUtil object /**
*/ * Constructor for the StringUtil object
private StringUtil() { */
} private StringUtil() { }
/**
* given a byte array of 16-bit unicode characters, compress to 8-bit and /**
* return a string * given a byte array of 16-bit unicode characters, compress to 8-bit and
* * return a string
* { 0x16, 0x00 } -0x16 *
* * { 0x16, 0x00 } -> 0x16
*@param string the byte array to be converted *
*@param offset the initial offset into the *@param string the byte array to be converted
* byte array. it is assumed that string[ offset ] and string[ offset + *@param offset the initial offset into the
* 1 ] contain the first 16-bit unicode character * byte array. it is assumed that string[ offset ] and string[ offset +
*@param len * 1 ] contain the first 16-bit unicode character
*@return the converted string *@param len
*@exception ArrayIndexOutOfBoundsException if offset is out of bounds for *@return the converted string
* the byte array (i.e., is negative or is greater than or equal to *@exception ArrayIndexOutOfBoundsException if offset is out of bounds for
* string.length) * the byte array (i.e., is negative or is greater than or equal to
*@exception IllegalArgumentException if len is too large (i.e., * string.length)
* there is not enough data in string to create a String of that *@exception IllegalArgumentException if len is too large (i.e.,
* length) * there is not enough data in string to create a String of that
*@len the length of the final string * length)
*/ *@len the length of the final string
public static String getFromUnicodeLE( */
final byte[] string,
final int offset,
final int len)
throws ArrayIndexOutOfBoundsException, IllegalArgumentException {
if ((offset < 0) || (offset >= string.length)) {
throw new ArrayIndexOutOfBoundsException("Illegal offset");
}
if ((len < 0) || (((string.length - offset) / 2) < len)) {
throw new IllegalArgumentException("Illegal length");
}
try { public static String getFromUnicodeHigh(final byte[] string,
return new String(string, offset, len * 2, "UTF-16LE"); final int offset, final int len)
} catch (UnsupportedEncodingException e) { throws ArrayIndexOutOfBoundsException, IllegalArgumentException {
throw new InternalError(); /*unreachable*/
}
}
/** if ((offset < 0) || (offset >= string.length)) {
* given a byte array of 16-bit unicode characters, compress to 8-bit and throw new ArrayIndexOutOfBoundsException("Illegal offset");
* return a string }
* if ((len < 0) || (((string.length - offset) / 2) < len)) {
* { 0x16, 0x00 } -0x16 throw new IllegalArgumentException("Illegal length");
* }
*@param string the byte array to be converted
*@return the converted string char[] chars = new char[ len ];
*/ for ( int i = 0; i < chars.length; i++ ) {
public static String getFromUnicodeLE(final byte[] string) { chars[i] = (char)( string[ offset + ( 2*i ) ] & 0xFF |
return getFromUnicodeLE(string, 0, string.length / 2); ( string[ offset + ( 2*i+1 ) ] << 8 ) );
} }
/** return new String( chars );
* given a byte array of 16-bit unicode characters, compress to 8-bit and }
* return a string
*
* { 0x00, 0x16 } -0x16 /**
* * given a byte array of 16-bit unicode characters, compress to 8-bit and
*@param string the byte array to be converted * return a string
**@param offset the initial offset into the *
* byte array. it is assumed that string[ offset ] and string[ offset + * { 0x16, 0x00 } -> 0x16
* 1 ] contain the first 16-bit unicode character *
*@param len *@param string the byte array to be converted
*@return the converted string *@return the converted string
*@exception ArrayIndexOutOfBoundsException if offset is out of bounds for */
* the byte array (i.e., is negative or is greater than or equal to
* string.length)
*@exception IllegalArgumentException if len is too large (i.e.,
* there is not enough data in string to create a String of that
* length)
*@len the length of the final string
*/
public static String getFromUnicodeBE(
final byte[] string,
final int offset,
final int len)
throws ArrayIndexOutOfBoundsException, IllegalArgumentException {
if ((offset < 0) || (offset >= string.length)) {
throw new ArrayIndexOutOfBoundsException("Illegal offset");
}
if ((len < 0) || (((string.length - offset) / 2) < len)) {
throw new IllegalArgumentException("Illegal length");
}
try {
return new String(string, offset, len * 2, "UTF-16BE");
} catch (UnsupportedEncodingException e) {
throw new InternalError(); /*unreachable*/
}
}
/** public static String getFromUnicodeHigh( final byte[] string ) {
* given a byte array of 16-bit unicode characters, compress to 8-bit and return getFromUnicodeHigh( string, 0, string.length / 2 );
* return a string }
*
* { 0x00, 0x16 } -0x16
*
*@param string the byte array to be converted
*@return the converted string
*/
public static String getFromUnicodeBE(final byte[] string) {
return getFromUnicodeBE(string, 0, string.length / 2);
}
/**
* read compressed unicode(8bit)
*
* @param string byte array to read
* @param offset offset to read byte array
* @param len length to read byte array
* @return String generated String instance by reading byte array
*/
public static String getFromCompressedUnicode(
final byte[] string,
final int offset,
final int len) {
try {
return new String(string, offset, len, "ISO-8859-1");
} catch (UnsupportedEncodingException e) {
throw new InternalError(); /* unreachable */
}
}
/** /**
* write compressed unicode * given a byte array of 16-bit unicode characters, compress to 8-bit and
* * return a string
*@param input the String containing the data to be written *
*@param output the byte array to which the data is to be written * { 0x00, 0x16 } -> 0x16
*@param offset an offset into the byte arrat at which the data is start *
* when written *@param string the byte array to be converted
*/ *@param offset the initial offset into the
public static void putCompressedUnicode( * byte array. it is assumed that string[ offset ] and string[ offset +
final String input, * 1 ] contain the first 16-bit unicode character
final byte[] output, *@param len
final int offset) { *@return the converted string
try { *@exception ArrayIndexOutOfBoundsException if offset is out of bounds for
byte[] bytes = input.getBytes("ISO-8859-1"); * the byte array (i.e., is negative or is greater than or equal to
System.arraycopy(bytes, 0, output, offset, bytes.length); * string.length)
} catch (UnsupportedEncodingException e) { *@exception IllegalArgumentException if len is too large (i.e.,
throw new InternalError(); /*unreachable*/ * there is not enough data in string to create a String of that
} * length)
} *@len the length of the final string
*/
/** public static String getFromUnicode(final byte[] string,
* Write uncompressed unicode final int offset, final int len)
* throws ArrayIndexOutOfBoundsException, IllegalArgumentException {
*@param input the String containing the unicode data to be written if ((offset < 0) || (offset >= string.length)) {
*@param output the byte array to hold the uncompressed unicode throw new ArrayIndexOutOfBoundsException("Illegal offset");
*@param offset the offset to start writing into the byte array }
*/ if ((len < 0) || (((string.length - offset) / 2) < len)) {
public static void putUnicodeLE( throw new IllegalArgumentException("Illegal length");
final String input, }
final byte[] output,
final int offset) {
try {
byte[] bytes = input.getBytes("UTF-16LE");
System.arraycopy(bytes, 0, output, offset, bytes.length);
} catch (UnsupportedEncodingException e) {
throw new InternalError(); /*unreachable*/
}
}
/**
* Write uncompressed unicode char[] chars = new char[ len ];
* for ( int i = 0; i < chars.length; i++ ) {
*@param input the String containing the unicode data to be written chars[i] = (char)( ( string[ offset + ( 2*i ) ] << 8 ) +
*@param output the byte array to hold the uncompressed unicode string[ offset + ( 2*i+1 ) ] );
*@param offset the offset to start writing into the byte array }
*/
public static void putUnicodeBE( return new String( chars );
final String input, }
final byte[] output,
final int offset) {
try {
byte[] bytes = input.getBytes("UTF-16BE");
System.arraycopy(bytes, 0, output, offset, bytes.length);
} catch (UnsupportedEncodingException e) {
throw new InternalError(); /*unreachable*/
}
}
/**
* Description of the Method
*
*@param message Description of the Parameter
*@param params Description of the Parameter
*@return Description of the Return Value
*/
public static String format(String message, Object[] params) {
int currentParamNumber = 0;
StringBuffer formattedMessage = new StringBuffer();
for (int i = 0; i < message.length(); i++) {
if (message.charAt(i) == '%') {
if (currentParamNumber >= params.length) {
formattedMessage.append("?missing data?");
} else if (
(params[currentParamNumber] instanceof Number)
&& (i + 1 < message.length())) {
i
+= matchOptionalFormatting(
(Number) params[currentParamNumber++],
message.substring(i + 1),
formattedMessage);
} else {
formattedMessage.append(
params[currentParamNumber++].toString());
}
} else {
if ((message.charAt(i) == '\\')
&& (i + 1 < message.length())
&& (message.charAt(i + 1) == '%')) {
formattedMessage.append('%');
i++;
} else {
formattedMessage.append(message.charAt(i));
}
}
}
return formattedMessage.toString();
}
/** /**
* Description of the Method * given a byte array of 16-bit unicode characters, compress to 8-bit and
* * return a string
*@param number Description of the Parameter *
*@param formatting Description of the Parameter * { 0x00, 0x16 } -> 0x16
*@param outputTo Description of the Parameter *
*@return Description of the Return Value *@param string the byte array to be converted
*/ *@return the converted string
private static int matchOptionalFormatting( */
Number number,
String formatting,
StringBuffer outputTo) {
NumberFormat numberFormat = NumberFormat.getInstance();
if ((0 < formatting.length())
&& Character.isDigit(formatting.charAt(0))) {
numberFormat.setMinimumIntegerDigits(
Integer.parseInt(formatting.charAt(0) + ""));
if ((2 < formatting.length())
&& (formatting.charAt(1) == '.')
&& Character.isDigit(formatting.charAt(2))) {
numberFormat.setMaximumFractionDigits(
Integer.parseInt(formatting.charAt(2) + ""));
numberFormat.format(number, outputTo, new FieldPosition(0));
return 3;
}
numberFormat.format(number, outputTo, new FieldPosition(0));
return 1;
} else if (
(0 < formatting.length()) && (formatting.charAt(0) == '.')) {
if ((1 < formatting.length())
&& Character.isDigit(formatting.charAt(1))) {
numberFormat.setMaximumFractionDigits(
Integer.parseInt(formatting.charAt(1) + ""));
numberFormat.format(number, outputTo, new FieldPosition(0));
return 2;
}
}
numberFormat.format(number, outputTo, new FieldPosition(0));
return 1;
}
/** public static String getFromUnicode(final byte[] string) {
* @return the encoding we want to use (ISO-8859-1) return getFromUnicode(string, 0, string.length / 2);
*/ }
public static String getPreferredEncoding() {
return ENCODING;
} /**
* read compressed unicode(8bit)
*
* @author Toshiaki Kamoshida(kamoshida.toshiaki at future dot co dot jp)
*
* @param string byte array to read
* @param offset offset to read byte array
* @param len length to read byte array
* @return String generated String instance by reading byte array
*/
public static String getFromCompressedUnicode(final byte[] string,
final int offset, final int len){
try{
return new String(string,offset,len,"ISO-8859-1");
}
catch(UnsupportedEncodingException e){
throw new InternalError();/* unreachable */
}
}
/**
* write compressed unicode
*
*@param input the String containing the data to be written
*@param output the byte array to which the data is to be written
*@param offset an offset into the byte arrat at which the data is start
* when written
*/
public static void putCompressedUnicode(final String input,
final byte[] output,
final int offset) {
int strlen = input.length();
for (int k = 0; k < strlen; k++) {
output[offset + k] = (byte) input.charAt(k);
}
}
/**
* Write uncompressed unicode
*
*@param input the String containing the unicode data to be written
*@param output the byte array to hold the uncompressed unicode
*@param offset the offset to start writing into the byte array
*/
public static void putUncompressedUnicode(final String input,
final byte[] output,
final int offset) {
int strlen = input.length();
for (int k = 0; k < strlen; k++) {
char c = input.charAt(k);
output[offset + (2 * k)] = (byte) c;
output[offset + (2 * k) + 1] = (byte) (c >> 8);
}
}
/**
* Write uncompressed unicode
*
*@param input the String containing the unicode data to be written
*@param output the byte array to hold the uncompressed unicode
*@param offset the offset to start writing into the byte array
*/
public static void putUncompressedUnicodeHigh(final String input,
final byte[] output,
final int offset) {
int strlen = input.length();
for (int k = 0; k < strlen; k++) {
char c = input.charAt(k);
output[offset + (2 * k)] = (byte) (c >> 8);
output[offset + (2 * k)] = (byte) c;
}
}
/**
* Description of the Method
*
*@param message Description of the Parameter
*@param params Description of the Parameter
*@return Description of the Return Value
*/
public static String format(String message, Object[] params) {
int currentParamNumber = 0;
StringBuffer formattedMessage = new StringBuffer();
for (int i = 0; i < message.length(); i++) {
if (message.charAt(i) == '%') {
if (currentParamNumber >= params.length) {
formattedMessage.append("?missing data?");
} else if ((params[currentParamNumber] instanceof Number)
&& (i + 1 < message.length())) {
i += matchOptionalFormatting(
(Number) params[currentParamNumber++],
message.substring(i + 1), formattedMessage);
} else {
formattedMessage.append(params[currentParamNumber++].toString());
}
} else {
if ((message.charAt(i) == '\\') && (i + 1 < message.length())
&& (message.charAt(i + 1) == '%')) {
formattedMessage.append('%');
i++;
} else {
formattedMessage.append(message.charAt(i));
}
}
}
return formattedMessage.toString();
}
/**
* Description of the Method
*
*@param number Description of the Parameter
*@param formatting Description of the Parameter
*@param outputTo Description of the Parameter
*@return Description of the Return Value
*/
private static int matchOptionalFormatting(Number number,
String formatting,
StringBuffer outputTo) {
NumberFormat numberFormat = NumberFormat.getInstance();
if ((0 < formatting.length())
&& Character.isDigit(formatting.charAt(0))) {
numberFormat.setMinimumIntegerDigits(Integer.parseInt(formatting.charAt(0) + ""));
if ((2 < formatting.length()) && (formatting.charAt(1) == '.')
&& Character.isDigit(formatting.charAt(2))) {
numberFormat.setMaximumFractionDigits(Integer.parseInt(formatting.charAt(2) + ""));
numberFormat.format(number, outputTo, new FieldPosition(0));
return 3;
}
numberFormat.format(number, outputTo, new FieldPosition(0));
return 1;
} else if ((0 < formatting.length()) && (formatting.charAt(0) == '.')) {
if ((1 < formatting.length())
&& Character.isDigit(formatting.charAt(1))) {
numberFormat.setMaximumFractionDigits(Integer.parseInt(formatting.charAt(1) + ""));
numberFormat.format(number, outputTo, new FieldPosition(0));
return 2;
}
}
numberFormat.format(number, outputTo, new FieldPosition(0));
return 1;
}
/**
* @return the encoding we want to use (ISO-8859-1)
*/
public static String getPreferredEncoding() {
return ENCODING;
}
} }