HSMF generally wants Java Lang style encoding names, which HPSF has always worked with Java NIO ones, so add a flag to CodePageUtil to let you select which style to return

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1497037 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2013-06-26 18:44:51 +00:00
parent 8aca6fde15
commit e82421ef02

View File

@ -234,20 +234,42 @@ public class CodePageUtil
/** /**
* <p>Turns a codepage number into the equivalent character encoding's * <p>Turns a codepage number into the equivalent character encoding's
* name.</p> * name (in Java NIO canonical naming format).</p>
* *
* @param codepage The codepage number * @param codepage The codepage number
* *
* @return The character encoding's name. If the codepage number is 65001, * @return The character encoding's name. If the codepage number is 65001,
* the encoding name is "UTF-8". All other positive numbers are mapped to * the encoding name is "UTF-8". All other positive numbers are mapped to
* "cp" followed by the number, e.g. if the codepage number is 1252 the * their Java NIO names, normally either "windows-" followed by the number,
* returned character encoding name will be "cp1252". * eg "windows-1251", or "cp" followed by the number, e.g. if the codepage
* number is 1252 the returned character encoding name will be "cp1252".
* *
* @exception UnsupportedEncodingException if the specified codepage is * @exception UnsupportedEncodingException if the specified codepage is
* less than zero. * less than zero.
*/ */
public static String codepageToEncoding(final int codepage) public static String codepageToEncoding(final int codepage)
throws UnsupportedEncodingException throws UnsupportedEncodingException
{
return codepageToEncoding(codepage, false);
}
/**
* <p>Turns a codepage number into the equivalent character encoding's
* name, in either Java NIO or Java Lang canonical naming.</p>
*
* @param codepage The codepage number
* @param javaLangFormat Should Java Lang or Java NIO naming be used?
*
* @return The character encoding's name, in either Java Lang format
* (eg Cp1251, ISO8859_5) or Java NIO format (eg windows-1252, ISO-8859-9)
*
* @see http://docs.oracle.com/javase/6/docs/technotes/guides/intl/encoding.doc.html
*
* @exception UnsupportedEncodingException if the specified codepage is
* less than zero.
*/
public static String codepageToEncoding(final int codepage, boolean javaLangFormat)
throws UnsupportedEncodingException
{ {
if (codepage <= 0) if (codepage <= 0)
throw new UnsupportedEncodingException("Codepage number may not be " + codepage); throw new UnsupportedEncodingException("Codepage number may not be " + codepage);
@ -266,23 +288,50 @@ public class CodePageUtil
case CP_MS949: case CP_MS949:
return "ms949"; return "ms949";
case CP_WINDOWS_1250: case CP_WINDOWS_1250:
return "windows-1250"; if (javaLangFormat)
return "Cp1250";
else
return "windows-1250";
case CP_WINDOWS_1251: case CP_WINDOWS_1251:
return "windows-1251"; if (javaLangFormat)
return "Cp1251";
else
return "windows-1251";
case CP_WINDOWS_1252: case CP_WINDOWS_1252:
return "windows-1252"; if (javaLangFormat)
return "Cp1252";
else
return "windows-1252";
case CP_WINDOWS_1253: case CP_WINDOWS_1253:
return "windows-1253"; if (javaLangFormat)
return "Cp1253";
else
return "windows-1253";
case CP_WINDOWS_1254: case CP_WINDOWS_1254:
return "windows-1254"; if (javaLangFormat)
return "Cp1254";
else
return "windows-1254";
case CP_WINDOWS_1255: case CP_WINDOWS_1255:
return "windows-1255"; if (javaLangFormat)
return "Cp1255";
else
return "windows-1255";
case CP_WINDOWS_1256: case CP_WINDOWS_1256:
return "windows-1256"; if (javaLangFormat)
return "Cp1255";
else
return "windows-1256";
case CP_WINDOWS_1257: case CP_WINDOWS_1257:
return "windows-1257"; if (javaLangFormat)
return "Cp1257";
else
return "windows-1257";
case CP_WINDOWS_1258: case CP_WINDOWS_1258:
return "windows-1258"; if (javaLangFormat)
return "Cp1258";
else
return "windows-1258";
case CP_JOHAB: case CP_JOHAB:
return "johab"; return "johab";
case CP_MAC_ROMAN: case CP_MAC_ROMAN:
@ -323,23 +372,50 @@ public class CodePageUtil
case CP_KOI8_R: case CP_KOI8_R:
return "KOI8-R"; return "KOI8-R";
case CP_ISO_8859_1: case CP_ISO_8859_1:
return "ISO-8859-1"; if (javaLangFormat)
return "ISO8859_1";
else
return "ISO-8859-1";
case CP_ISO_8859_2: case CP_ISO_8859_2:
return "ISO-8859-2"; if (javaLangFormat)
return "ISO8859_2";
else
return "ISO-8859-2";
case CP_ISO_8859_3: case CP_ISO_8859_3:
return "ISO-8859-3"; if (javaLangFormat)
return "ISO8859_3";
else
return "ISO-8859-3";
case CP_ISO_8859_4: case CP_ISO_8859_4:
return "ISO-8859-4"; if (javaLangFormat)
return "ISO8859_4";
else
return "ISO-8859-4";
case CP_ISO_8859_5: case CP_ISO_8859_5:
return "ISO-8859-5"; if (javaLangFormat)
return "ISO8859_5";
else
return "ISO-8859-5";
case CP_ISO_8859_6: case CP_ISO_8859_6:
return "ISO-8859-6"; if (javaLangFormat)
return "ISO8859_6";
else
return "ISO-8859-6";
case CP_ISO_8859_7: case CP_ISO_8859_7:
return "ISO-8859-7"; if (javaLangFormat)
return "ISO8859_7";
else
return "ISO-8859-7";
case CP_ISO_8859_8: case CP_ISO_8859_8:
return "ISO-8859-8"; if (javaLangFormat)
return "ISO8859_8";
else
return "ISO-8859-8";
case CP_ISO_8859_9: case CP_ISO_8859_9:
return "ISO-8859-9"; if (javaLangFormat)
return "ISO8859_9";
else
return "ISO-8859-9";
case CP_ISO_2022_JP1: case CP_ISO_2022_JP1:
case CP_ISO_2022_JP2: case CP_ISO_2022_JP2:
case CP_ISO_2022_JP3: case CP_ISO_2022_JP3: