Support for many, many character encodings added. Thanks to Trejkaz
<trejkaz @at@ trypticon.org> for the patch! git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@400277 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
bea0a04d24
commit
b495e0f3c4
@ -26,16 +26,54 @@ package org.apache.poi.hpsf;
|
||||
*/
|
||||
public class Constants
|
||||
{
|
||||
/** <p>Codepage 037, a special case.</p> */
|
||||
public static final int CP_037 = 37;
|
||||
|
||||
/** <p>Codepage for SJIS</p> */
|
||||
public static final int CP_SJIS = 932;
|
||||
|
||||
/** <p>Codepage for GBK, aka MS936.</p> */
|
||||
public static final int CP_GBK = 936;
|
||||
|
||||
/** <p>Codepage for MS949</p> */
|
||||
public static final int CP_MS949 = 949;
|
||||
|
||||
/** <p>Codepage for UTF-16</p> */
|
||||
/** <p>Codepage for UTF-16 (little-endian.)</p> */
|
||||
public static final int CP_UTF16 = 1200;
|
||||
|
||||
/** <p>Codepage for UTF-16 big-endian.</p> */
|
||||
public static final int CP_UTF16_BE = 1201;
|
||||
|
||||
/** <p>Codepage for Windows 1250.</p> */
|
||||
public static final int CP_WINDOWS_1250 = 1250;
|
||||
|
||||
/** <p>Codepage for Windows 1251.</p> */
|
||||
public static final int CP_WINDOWS_1251 = 1251;
|
||||
|
||||
/** <p>Codepage for Windows 1252.</p> */
|
||||
public static final int CP_WINDOWS_1252 = 1252;
|
||||
|
||||
/** <p>Codepage for Windows 1253.</p> */
|
||||
public static final int CP_WINDOWS_1253 = 1253;
|
||||
|
||||
/** <p>Codepage for Windows 1254.</p> */
|
||||
public static final int CP_WINDOWS_1254 = 1254;
|
||||
|
||||
/** <p>Codepage for Windows 1255.</p> */
|
||||
public static final int CP_WINDOWS_1255 = 1255;
|
||||
|
||||
/** <p>Codepage for Windows 1256.</p> */
|
||||
public static final int CP_WINDOWS_1256 = 1256;
|
||||
|
||||
/** <p>Codepage for Windows 1257.</p> */
|
||||
public static final int CP_WINDOWS_1257 = 1257;
|
||||
|
||||
/** <p>Codepage for Windows 1258.</p> */
|
||||
public static final int CP_WINDOWS_1258 = 1258;
|
||||
|
||||
/** <p>Codepage for Johab.</p> */
|
||||
public static final int CP_JOHAB = 1361;
|
||||
|
||||
/** <p>Codepage for Macintosh Roman (Java: MacRoman)</p> */
|
||||
public static final int CP_MAC_ROMAN = 10000;
|
||||
|
||||
@ -89,10 +127,69 @@ public class Constants
|
||||
/** <p>Codepage for Macintosh Croatian (Java: MacCroatian)</p> */
|
||||
public static final int CP_MAC_CROATIAN = 10082;
|
||||
|
||||
/** <p>Codepage for US-ASCII.</p> */
|
||||
public static final int CP_US_ACSII = 20127;
|
||||
|
||||
/** <p>Codepage for KOI8-R</p> */
|
||||
public static final int CP_KOI8_R = 20866;
|
||||
|
||||
/** <p>Codepage for ISO-8859-1.</p> */
|
||||
public static final int CP_ISO_8859_1 = 28591;
|
||||
|
||||
/** <p>Codepage for ISO-8859-2.</p> */
|
||||
public static final int CP_ISO_8859_2 = 28592;
|
||||
|
||||
/** <p>Codepage for ISO-8859-3.</p> */
|
||||
public static final int CP_ISO_8859_3 = 28593;
|
||||
|
||||
/** <p>Codepage for ISO-8859-4.</p> */
|
||||
public static final int CP_ISO_8859_4 = 28594;
|
||||
|
||||
/** <p>Codepage for ISO-8859-5.</p> */
|
||||
public static final int CP_ISO_8859_5 = 28595;
|
||||
|
||||
/** <p>Codepage for ISO-8859-6.</p> */
|
||||
public static final int CP_ISO_8859_6 = 28596;
|
||||
|
||||
/** <p>Codepage for ISO-8859-7.</p> */
|
||||
public static final int CP_ISO_8859_7 = 28597;
|
||||
|
||||
/** <p>Codepage for ISO-8859-8.</p> */
|
||||
public static final int CP_ISO_8859_8 = 28598;
|
||||
|
||||
/** <p>Codepage for ISO-8859-9.</p> */
|
||||
public static final int CP_ISO_8859_9 = 28599;
|
||||
|
||||
/** <p>Codepage for ISO-2022-JP</p> */
|
||||
public static final int CP_ISO_2022_JP1 = 50220;
|
||||
|
||||
/** <p>Another codepage for ISO-2022-JP</p> */
|
||||
public static final int CP_ISO_2022_JP2 = 50221;
|
||||
|
||||
/** <p>Yet another codepage for ISO-2022-JP</p> */
|
||||
public static final int CP_ISO_2022_JP3 = 50222;
|
||||
|
||||
/** <p>Codepage for ISO-2022-KR</p> */
|
||||
public static final int CP_ISO_2022_KR = 50225;
|
||||
|
||||
/** <p>Codepage for EUC-JP</p> */
|
||||
public static final int CP_EUC_JP = 51932;
|
||||
|
||||
/** <p>Codepage for EUC-KR</p> */
|
||||
public static final int CP_EUC_KR = 51949;
|
||||
|
||||
/** <p>Codepage for GB2312.</p> */
|
||||
public static final int CP_GB2312 = 52936;
|
||||
|
||||
/** <p>Codepage for GB18030.</p> */
|
||||
public static final int CP_GB18030 = 54936;
|
||||
|
||||
/** <p>Another codepage for US-ASCII.</p> */
|
||||
public static final int CP_US_ASCII2 = 65000;
|
||||
|
||||
/** <p>Codepage for UTF-8</p> */
|
||||
public static final int CP_UTF8 = 65001;
|
||||
|
||||
/** <p>Codepage for Unicode</p> */
|
||||
public static final int CP_UNICODE = CP_UTF16;
|
||||
|
||||
}
|
||||
|
@ -305,10 +305,36 @@ public class VariantSupport extends Variant
|
||||
{
|
||||
case Constants.CP_UTF16:
|
||||
return "UTF-16";
|
||||
case Constants.CP_UTF16_BE:
|
||||
return "UTF-16BE";
|
||||
case Constants.CP_UTF8:
|
||||
return "UTF-8";
|
||||
case Constants.CP_037:
|
||||
return "cp037";
|
||||
case Constants.CP_GBK:
|
||||
return "GBK";
|
||||
case Constants.CP_MS949:
|
||||
return "ms949";
|
||||
case Constants.CP_WINDOWS_1250:
|
||||
return "windows-1250";
|
||||
case Constants.CP_WINDOWS_1251:
|
||||
return "windows-1251";
|
||||
case Constants.CP_WINDOWS_1252:
|
||||
return "windows-1252";
|
||||
case Constants.CP_WINDOWS_1253:
|
||||
return "windows-1253";
|
||||
case Constants.CP_WINDOWS_1254:
|
||||
return "windows-1254";
|
||||
case Constants.CP_WINDOWS_1255:
|
||||
return "windows-1255";
|
||||
case Constants.CP_WINDOWS_1256:
|
||||
return "windows-1256";
|
||||
case Constants.CP_WINDOWS_1257:
|
||||
return "windows-1257";
|
||||
case Constants.CP_WINDOWS_1258:
|
||||
return "windows-1258";
|
||||
case Constants.CP_JOHAB:
|
||||
return "johab";
|
||||
case Constants.CP_MAC_ROMAN:
|
||||
return "MacRoman";
|
||||
case Constants.CP_MAC_JAPAN:
|
||||
@ -341,6 +367,43 @@ public class VariantSupport extends Variant
|
||||
return "MacTurkish";
|
||||
case Constants.CP_MAC_CROATIAN:
|
||||
return "MacCroatian";
|
||||
case Constants.CP_US_ACSII:
|
||||
case Constants.CP_US_ASCII2:
|
||||
return "US-ASCII";
|
||||
case Constants.CP_KOI8_R:
|
||||
return "KOI8-R";
|
||||
case Constants.CP_ISO_8859_1:
|
||||
return "ISO-8859-1";
|
||||
case Constants.CP_ISO_8859_2:
|
||||
return "ISO-8859-2";
|
||||
case Constants.CP_ISO_8859_3:
|
||||
return "ISO-8859-3";
|
||||
case Constants.CP_ISO_8859_4:
|
||||
return "ISO-8859-4";
|
||||
case Constants.CP_ISO_8859_5:
|
||||
return "ISO-8859-5";
|
||||
case Constants.CP_ISO_8859_6:
|
||||
return "ISO-8859-6";
|
||||
case Constants.CP_ISO_8859_7:
|
||||
return "ISO-8859-7";
|
||||
case Constants.CP_ISO_8859_8:
|
||||
return "ISO-8859-8";
|
||||
case Constants.CP_ISO_8859_9:
|
||||
return "ISO-8859-9";
|
||||
case Constants.CP_ISO_2022_JP1:
|
||||
case Constants.CP_ISO_2022_JP2:
|
||||
case Constants.CP_ISO_2022_JP3:
|
||||
return "ISO-2022-JP";
|
||||
case Constants.CP_ISO_2022_KR:
|
||||
return "ISO-2022-KR";
|
||||
case Constants.CP_EUC_JP:
|
||||
return "EUC-JP";
|
||||
case Constants.CP_EUC_KR:
|
||||
return "EUC-KR";
|
||||
case Constants.CP_GB2312:
|
||||
return "GB2312";
|
||||
case Constants.CP_GB18030:
|
||||
return "GB18030";
|
||||
case Constants.CP_SJIS:
|
||||
return "SJIS";
|
||||
default:
|
||||
|
Loading…
Reference in New Issue
Block a user