Fix inconsistent indent/whitespace
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1700647 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3e9159c523
commit
46eccc04cc
@ -33,160 +33,159 @@ import org.apache.poi.hssf.record.RecordInputStream;
|
|||||||
* For such functionality, consider using {@link RecordInputStream}
|
* For such functionality, consider using {@link RecordInputStream}
|
||||||
*/
|
*/
|
||||||
public class StringUtil {
|
public class StringUtil {
|
||||||
protected static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
|
protected static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
|
||||||
protected static final Charset UTF16LE = Charset.forName("UTF-16LE");
|
protected static final Charset UTF16LE = Charset.forName("UTF-16LE");
|
||||||
public static final Charset UTF8 = Charset.forName("UTF-8");
|
public static final Charset UTF8 = Charset.forName("UTF-8");
|
||||||
|
|
||||||
private static Map<Integer,Integer> msCodepointToUnicode;
|
private static Map<Integer,Integer> msCodepointToUnicode;
|
||||||
|
|
||||||
private StringUtil() {
|
private StringUtil() {
|
||||||
// no instances of this class
|
// no instances of this class
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a byte array of 16-bit unicode characters in Little Endian
|
* Given a byte array of 16-bit unicode characters in Little Endian
|
||||||
* format (most important byte last), return a Java String representation
|
* format (most important byte last), return a Java String representation
|
||||||
* of it.
|
* of it.
|
||||||
*
|
*
|
||||||
* { 0x16, 0x00 } -0x16
|
* { 0x16, 0x00 } -0x16
|
||||||
*
|
*
|
||||||
* @param string the byte array to be converted
|
* @param string the byte array to be converted
|
||||||
* @param offset the initial offset into the
|
* @param offset the initial offset into the
|
||||||
* byte array. it is assumed that string[ offset ] and string[ offset +
|
* byte array. it is assumed that string[ offset ] and string[ offset +
|
||||||
* 1 ] contain the first 16-bit unicode character
|
* 1 ] contain the first 16-bit unicode character
|
||||||
* @param len the length of the final string
|
* @param len the length of the final string
|
||||||
* @return the converted string, never <code>null</code>.
|
* @return the converted string, never <code>null</code>.
|
||||||
* @exception ArrayIndexOutOfBoundsException if offset is out of bounds for
|
* @exception ArrayIndexOutOfBoundsException if offset is out of bounds for
|
||||||
* the byte array (i.e., is negative or is greater than or equal to
|
* the byte array (i.e., is negative or is greater than or equal to
|
||||||
* string.length)
|
* string.length)
|
||||||
* @exception IllegalArgumentException if len is too large (i.e.,
|
* @exception IllegalArgumentException if len is too large (i.e.,
|
||||||
* there is not enough data in string to create a String of that
|
* there is not enough data in string to create a String of that
|
||||||
* length)
|
* length)
|
||||||
*/
|
*/
|
||||||
public static String getFromUnicodeLE(
|
public static String getFromUnicodeLE(
|
||||||
final byte[] string,
|
final byte[] string,
|
||||||
final int offset,
|
final int offset,
|
||||||
final int len)
|
final int len)
|
||||||
throws ArrayIndexOutOfBoundsException, IllegalArgumentException {
|
throws ArrayIndexOutOfBoundsException, IllegalArgumentException {
|
||||||
if ((offset < 0) || (offset >= string.length)) {
|
if ((offset < 0) || (offset >= string.length)) {
|
||||||
throw new ArrayIndexOutOfBoundsException("Illegal offset " + offset + " (String data is of length " + string.length + ")");
|
throw new ArrayIndexOutOfBoundsException("Illegal offset " + offset + " (String data is of length " + string.length + ")");
|
||||||
}
|
}
|
||||||
if ((len < 0) || (((string.length - offset) / 2) < len)) {
|
if ((len < 0) || (((string.length - offset) / 2) < len)) {
|
||||||
throw new IllegalArgumentException("Illegal length " + len);
|
throw new IllegalArgumentException("Illegal length " + len);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new String(string, offset, len * 2, UTF16LE);
|
return new String(string, offset, len * 2, UTF16LE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a byte array of 16-bit unicode characters in little endian
|
* Given a byte array of 16-bit unicode characters in little endian
|
||||||
* format (most important byte last), return a Java String representation
|
* format (most important byte last), return a Java String representation
|
||||||
* of it.
|
* of it.
|
||||||
*
|
*
|
||||||
* { 0x16, 0x00 } -0x16
|
* { 0x16, 0x00 } -0x16
|
||||||
*
|
*
|
||||||
* @param string the byte array to be converted
|
* @param string the byte array to be converted
|
||||||
* @return the converted string, never <code>null</code>
|
* @return the converted string, never <code>null</code>
|
||||||
*/
|
*/
|
||||||
public static String getFromUnicodeLE(byte[] string) {
|
public static String getFromUnicodeLE(byte[] string) {
|
||||||
if(string.length == 0) { return ""; }
|
if(string.length == 0) { return ""; }
|
||||||
return getFromUnicodeLE(string, 0, string.length / 2);
|
return getFromUnicodeLE(string, 0, string.length / 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert String to 16-bit unicode characters in little endian format
|
|
||||||
*
|
|
||||||
* @param string the string
|
|
||||||
* @return the byte array of 16-bit unicode characters
|
|
||||||
*/
|
|
||||||
public static byte[] getToUnicodeLE(String string) {
|
|
||||||
return string.getBytes(UTF16LE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read 8 bit data (in ISO-8859-1 codepage) into a (unicode) Java
|
* Convert String to 16-bit unicode characters in little endian format
|
||||||
* String and return.
|
*
|
||||||
* (In Excel terms, read compressed 8 bit unicode as a string)
|
* @param string the string
|
||||||
*
|
* @return the byte array of 16-bit unicode characters
|
||||||
* @param string byte array to read
|
*/
|
||||||
* @param offset offset to read byte array
|
public static byte[] getToUnicodeLE(String string) {
|
||||||
* @param len length to read byte array
|
return string.getBytes(UTF16LE);
|
||||||
* @return String generated String instance by reading byte array
|
}
|
||||||
*/
|
|
||||||
public static String getFromCompressedUnicode(
|
|
||||||
final byte[] string,
|
|
||||||
final int offset,
|
|
||||||
final int len) {
|
|
||||||
int len_to_use = Math.min(len, string.length - offset);
|
|
||||||
return new String(string, offset, len_to_use, ISO_8859_1);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String readCompressedUnicode(LittleEndianInput in, int nChars) {
|
|
||||||
byte[] buf = new byte[nChars];
|
|
||||||
in.readFully(buf);
|
|
||||||
return new String(buf, ISO_8859_1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* InputStream <tt>in</tt> is expected to contain:
|
|
||||||
* <ol>
|
|
||||||
* <li>ushort nChars</li>
|
|
||||||
* <li>byte is16BitFlag</li>
|
|
||||||
* <li>byte[]/char[] characterData</li>
|
|
||||||
* </ol>
|
|
||||||
* For this encoding, the is16BitFlag is always present even if nChars==0.
|
|
||||||
*
|
|
||||||
* This structure is also known as a XLUnicodeString.
|
|
||||||
*/
|
|
||||||
public static String readUnicodeString(LittleEndianInput in) {
|
|
||||||
|
|
||||||
int nChars = in.readUShort();
|
/**
|
||||||
byte flag = in.readByte();
|
* Read 8 bit data (in ISO-8859-1 codepage) into a (unicode) Java
|
||||||
if ((flag & 0x01) == 0) {
|
* String and return.
|
||||||
return readCompressedUnicode(in, nChars);
|
* (In Excel terms, read compressed 8 bit unicode as a string)
|
||||||
}
|
*
|
||||||
return readUnicodeLE(in, nChars);
|
* @param string byte array to read
|
||||||
}
|
* @param offset offset to read byte array
|
||||||
/**
|
* @param len length to read byte array
|
||||||
* InputStream <tt>in</tt> is expected to contain:
|
* @return String generated String instance by reading byte array
|
||||||
* <ol>
|
*/
|
||||||
* <li>byte is16BitFlag</li>
|
public static String getFromCompressedUnicode(
|
||||||
* <li>byte[]/char[] characterData</li>
|
final byte[] string,
|
||||||
* </ol>
|
final int offset,
|
||||||
* For this encoding, the is16BitFlag is always present even if nChars==0.
|
final int len) {
|
||||||
* <br/>
|
int len_to_use = Math.min(len, string.length - offset);
|
||||||
* This method should be used when the nChars field is <em>not</em> stored
|
return new String(string, offset, len_to_use, ISO_8859_1);
|
||||||
* as a ushort immediately before the is16BitFlag. Otherwise, {@link
|
}
|
||||||
* #readUnicodeString(LittleEndianInput)} can be used.
|
|
||||||
*/
|
|
||||||
public static String readUnicodeString(LittleEndianInput in, int nChars) {
|
|
||||||
byte is16Bit = in.readByte();
|
|
||||||
if ((is16Bit & 0x01) == 0) {
|
|
||||||
return readCompressedUnicode(in, nChars);
|
|
||||||
}
|
|
||||||
return readUnicodeLE(in, nChars);
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* OutputStream <tt>out</tt> will get:
|
|
||||||
* <ol>
|
|
||||||
* <li>ushort nChars</li>
|
|
||||||
* <li>byte is16BitFlag</li>
|
|
||||||
* <li>byte[]/char[] characterData</li>
|
|
||||||
* </ol>
|
|
||||||
* For this encoding, the is16BitFlag is always present even if nChars==0.
|
|
||||||
*/
|
|
||||||
public static void writeUnicodeString(LittleEndianOutput out, String value) {
|
|
||||||
|
|
||||||
int nChars = value.length();
|
public static String readCompressedUnicode(LittleEndianInput in, int nChars) {
|
||||||
out.writeShort(nChars);
|
byte[] buf = new byte[nChars];
|
||||||
boolean is16Bit = hasMultibyte(value);
|
in.readFully(buf);
|
||||||
out.writeByte(is16Bit ? 0x01 : 0x00);
|
return new String(buf, ISO_8859_1);
|
||||||
if (is16Bit) {
|
}
|
||||||
putUnicodeLE(value, out);
|
|
||||||
} else {
|
/**
|
||||||
putCompressedUnicode(value, out);
|
* InputStream <tt>in</tt> is expected to contain:
|
||||||
}
|
* <ol>
|
||||||
}
|
* <li>ushort nChars</li>
|
||||||
|
* <li>byte is16BitFlag</li>
|
||||||
|
* <li>byte[]/char[] characterData</li>
|
||||||
|
* </ol>
|
||||||
|
* For this encoding, the is16BitFlag is always present even if nChars==0.
|
||||||
|
*
|
||||||
|
* This structure is also known as a XLUnicodeString.
|
||||||
|
*/
|
||||||
|
public static String readUnicodeString(LittleEndianInput in) {
|
||||||
|
|
||||||
|
int nChars = in.readUShort();
|
||||||
|
byte flag = in.readByte();
|
||||||
|
if ((flag & 0x01) == 0) {
|
||||||
|
return readCompressedUnicode(in, nChars);
|
||||||
|
}
|
||||||
|
return readUnicodeLE(in, nChars);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* InputStream <tt>in</tt> is expected to contain:
|
||||||
|
* <ol>
|
||||||
|
* <li>byte is16BitFlag</li>
|
||||||
|
* <li>byte[]/char[] characterData</li>
|
||||||
|
* </ol>
|
||||||
|
* For this encoding, the is16BitFlag is always present even if nChars==0.
|
||||||
|
* <br/>
|
||||||
|
* This method should be used when the nChars field is <em>not</em> stored
|
||||||
|
* as a ushort immediately before the is16BitFlag. Otherwise, {@link
|
||||||
|
* #readUnicodeString(LittleEndianInput)} can be used.
|
||||||
|
*/
|
||||||
|
public static String readUnicodeString(LittleEndianInput in, int nChars) {
|
||||||
|
byte is16Bit = in.readByte();
|
||||||
|
if ((is16Bit & 0x01) == 0) {
|
||||||
|
return readCompressedUnicode(in, nChars);
|
||||||
|
}
|
||||||
|
return readUnicodeLE(in, nChars);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* OutputStream <tt>out</tt> will get:
|
||||||
|
* <ol>
|
||||||
|
* <li>ushort nChars</li>
|
||||||
|
* <li>byte is16BitFlag</li>
|
||||||
|
* <li>byte[]/char[] characterData</li>
|
||||||
|
* </ol>
|
||||||
|
* For this encoding, the is16BitFlag is always present even if nChars==0.
|
||||||
|
*/
|
||||||
|
public static void writeUnicodeString(LittleEndianOutput out, String value) {
|
||||||
|
int nChars = value.length();
|
||||||
|
out.writeShort(nChars);
|
||||||
|
boolean is16Bit = hasMultibyte(value);
|
||||||
|
out.writeByte(is16Bit ? 0x01 : 0x00);
|
||||||
|
if (is16Bit) {
|
||||||
|
putUnicodeLE(value, out);
|
||||||
|
} else {
|
||||||
|
putCompressedUnicode(value, out);
|
||||||
|
}
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* OutputStream <tt>out</tt> will get:
|
* OutputStream <tt>out</tt> will get:
|
||||||
* <ol>
|
* <ol>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user