Fix inconsistent indent/whitespace

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1700647 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2015-09-01 19:17:40 +00:00
parent 3e9159c523
commit 46eccc04cc

View File

@ -33,160 +33,159 @@ import org.apache.poi.hssf.record.RecordInputStream;
* For such functionality, consider using {@link RecordInputStream} * For such functionality, consider using {@link RecordInputStream}
*/ */
public class StringUtil { public class StringUtil {
protected static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1"); protected static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
protected static final Charset UTF16LE = Charset.forName("UTF-16LE"); protected static final Charset UTF16LE = Charset.forName("UTF-16LE");
public static final Charset UTF8 = Charset.forName("UTF-8"); public static final Charset UTF8 = Charset.forName("UTF-8");
private static Map<Integer,Integer> msCodepointToUnicode; private static Map<Integer,Integer> msCodepointToUnicode;
private StringUtil() { private StringUtil() {
// no instances of this class // no instances of this class
} }
/** /**
* Given a byte array of 16-bit unicode characters in Little Endian * Given a byte array of 16-bit unicode characters in Little Endian
* format (most important byte last), return a Java String representation * format (most important byte last), return a Java String representation
* of it. * of it.
* *
* { 0x16, 0x00 } -0x16 * { 0x16, 0x00 } -0x16
* *
* @param string the byte array to be converted * @param string the byte array to be converted
* @param offset the initial offset into the * @param offset the initial offset into the
* byte array. it is assumed that string[ offset ] and string[ offset + * byte array. it is assumed that string[ offset ] and string[ offset +
* 1 ] contain the first 16-bit unicode character * 1 ] contain the first 16-bit unicode character
* @param len the length of the final string * @param len the length of the final string
* @return the converted string, never <code>null</code>. * @return the converted string, never <code>null</code>.
* @exception ArrayIndexOutOfBoundsException if offset is out of bounds for * @exception ArrayIndexOutOfBoundsException if offset is out of bounds for
* the byte array (i.e., is negative or is greater than or equal to * the byte array (i.e., is negative or is greater than or equal to
* string.length) * string.length)
* @exception IllegalArgumentException if len is too large (i.e., * @exception IllegalArgumentException if len is too large (i.e.,
* there is not enough data in string to create a String of that * there is not enough data in string to create a String of that
* length) * length)
*/ */
public static String getFromUnicodeLE( public static String getFromUnicodeLE(
final byte[] string, final byte[] string,
final int offset, final int offset,
final int len) final int len)
throws ArrayIndexOutOfBoundsException, IllegalArgumentException { throws ArrayIndexOutOfBoundsException, IllegalArgumentException {
if ((offset < 0) || (offset >= string.length)) { if ((offset < 0) || (offset >= string.length)) {
throw new ArrayIndexOutOfBoundsException("Illegal offset " + offset + " (String data is of length " + string.length + ")"); throw new ArrayIndexOutOfBoundsException("Illegal offset " + offset + " (String data is of length " + string.length + ")");
} }
if ((len < 0) || (((string.length - offset) / 2) < len)) { if ((len < 0) || (((string.length - offset) / 2) < len)) {
throw new IllegalArgumentException("Illegal length " + len); throw new IllegalArgumentException("Illegal length " + len);
} }
return new String(string, offset, len * 2, UTF16LE); return new String(string, offset, len * 2, UTF16LE);
} }
/** /**
* Given a byte array of 16-bit unicode characters in little endian * Given a byte array of 16-bit unicode characters in little endian
* format (most important byte last), return a Java String representation * format (most important byte last), return a Java String representation
* of it. * of it.
* *
* { 0x16, 0x00 } -0x16 * { 0x16, 0x00 } -0x16
* *
* @param string the byte array to be converted * @param string the byte array to be converted
* @return the converted string, never <code>null</code> * @return the converted string, never <code>null</code>
*/ */
public static String getFromUnicodeLE(byte[] string) { public static String getFromUnicodeLE(byte[] string) {
if(string.length == 0) { return ""; } if(string.length == 0) { return ""; }
return getFromUnicodeLE(string, 0, string.length / 2); return getFromUnicodeLE(string, 0, string.length / 2);
} }
/** /**
* Convert String to 16-bit unicode characters in little endian format * Convert String to 16-bit unicode characters in little endian format
* *
* @param string the string * @param string the string
* @return the byte array of 16-bit unicode characters * @return the byte array of 16-bit unicode characters
*/ */
public static byte[] getToUnicodeLE(String string) { public static byte[] getToUnicodeLE(String string) {
return string.getBytes(UTF16LE); return string.getBytes(UTF16LE);
} }
/** /**
* Read 8 bit data (in ISO-8859-1 codepage) into a (unicode) Java * Read 8 bit data (in ISO-8859-1 codepage) into a (unicode) Java
* String and return. * String and return.
* (In Excel terms, read compressed 8 bit unicode as a string) * (In Excel terms, read compressed 8 bit unicode as a string)
* *
* @param string byte array to read * @param string byte array to read
* @param offset offset to read byte array * @param offset offset to read byte array
* @param len length to read byte array * @param len length to read byte array
* @return String generated String instance by reading byte array * @return String generated String instance by reading byte array
*/ */
public static String getFromCompressedUnicode( public static String getFromCompressedUnicode(
final byte[] string, final byte[] string,
final int offset, final int offset,
final int len) { final int len) {
int len_to_use = Math.min(len, string.length - offset); int len_to_use = Math.min(len, string.length - offset);
return new String(string, offset, len_to_use, ISO_8859_1); return new String(string, offset, len_to_use, ISO_8859_1);
} }
public static String readCompressedUnicode(LittleEndianInput in, int nChars) { public static String readCompressedUnicode(LittleEndianInput in, int nChars) {
byte[] buf = new byte[nChars]; byte[] buf = new byte[nChars];
in.readFully(buf); in.readFully(buf);
return new String(buf, ISO_8859_1); return new String(buf, ISO_8859_1);
} }
/** /**
* InputStream <tt>in</tt> is expected to contain: * InputStream <tt>in</tt> is expected to contain:
* <ol> * <ol>
* <li>ushort nChars</li> * <li>ushort nChars</li>
* <li>byte is16BitFlag</li> * <li>byte is16BitFlag</li>
* <li>byte[]/char[] characterData</li> * <li>byte[]/char[] characterData</li>
* </ol> * </ol>
* For this encoding, the is16BitFlag is always present even if nChars==0. * For this encoding, the is16BitFlag is always present even if nChars==0.
* *
* This structure is also known as a XLUnicodeString. * This structure is also known as a XLUnicodeString.
*/ */
public static String readUnicodeString(LittleEndianInput in) { public static String readUnicodeString(LittleEndianInput in) {
int nChars = in.readUShort(); int nChars = in.readUShort();
byte flag = in.readByte(); byte flag = in.readByte();
if ((flag & 0x01) == 0) { if ((flag & 0x01) == 0) {
return readCompressedUnicode(in, nChars); return readCompressedUnicode(in, nChars);
} }
return readUnicodeLE(in, nChars); return readUnicodeLE(in, nChars);
} }
/** /**
* InputStream <tt>in</tt> is expected to contain: * InputStream <tt>in</tt> is expected to contain:
* <ol> * <ol>
* <li>byte is16BitFlag</li> * <li>byte is16BitFlag</li>
* <li>byte[]/char[] characterData</li> * <li>byte[]/char[] characterData</li>
* </ol> * </ol>
* For this encoding, the is16BitFlag is always present even if nChars==0. * For this encoding, the is16BitFlag is always present even if nChars==0.
* <br/> * <br/>
* This method should be used when the nChars field is <em>not</em> stored * This method should be used when the nChars field is <em>not</em> stored
* as a ushort immediately before the is16BitFlag. Otherwise, {@link * as a ushort immediately before the is16BitFlag. Otherwise, {@link
* #readUnicodeString(LittleEndianInput)} can be used. * #readUnicodeString(LittleEndianInput)} can be used.
*/ */
public static String readUnicodeString(LittleEndianInput in, int nChars) { public static String readUnicodeString(LittleEndianInput in, int nChars) {
byte is16Bit = in.readByte(); byte is16Bit = in.readByte();
if ((is16Bit & 0x01) == 0) { if ((is16Bit & 0x01) == 0) {
return readCompressedUnicode(in, nChars); return readCompressedUnicode(in, nChars);
} }
return readUnicodeLE(in, nChars); return readUnicodeLE(in, nChars);
} }
/** /**
* OutputStream <tt>out</tt> will get: * OutputStream <tt>out</tt> will get:
* <ol> * <ol>
* <li>ushort nChars</li> * <li>ushort nChars</li>
* <li>byte is16BitFlag</li> * <li>byte is16BitFlag</li>
* <li>byte[]/char[] characterData</li> * <li>byte[]/char[] characterData</li>
* </ol> * </ol>
* For this encoding, the is16BitFlag is always present even if nChars==0. * For this encoding, the is16BitFlag is always present even if nChars==0.
*/ */
public static void writeUnicodeString(LittleEndianOutput out, String value) { public static void writeUnicodeString(LittleEndianOutput out, String value) {
int nChars = value.length();
int nChars = value.length(); out.writeShort(nChars);
out.writeShort(nChars); boolean is16Bit = hasMultibyte(value);
boolean is16Bit = hasMultibyte(value); out.writeByte(is16Bit ? 0x01 : 0x00);
out.writeByte(is16Bit ? 0x01 : 0x00); if (is16Bit) {
if (is16Bit) { putUnicodeLE(value, out);
putUnicodeLE(value, out); } else {
} else { putCompressedUnicode(value, out);
putCompressedUnicode(value, out); }
} }
}
/** /**
* OutputStream <tt>out</tt> will get: * OutputStream <tt>out</tt> will get:
* <ol> * <ol>