mirror of
https://github.com/moparisthebest/SickRage
synced 2024-11-14 13:25:11 -05:00
215 lines
6.1 KiB
Python
215 lines
6.1 KiB
Python
|
# -*- coding: UTF-8 -*-
|
||
|
"""
|
||
|
Functions to manage internationalisation (i18n):
|
||
|
- initLocale(): setup locales and install Unicode compatible stdout and
|
||
|
stderr ;
|
||
|
- getTerminalCharset(): guess terminal charset ;
|
||
|
- gettext(text) translate a string to current language. The function always
|
||
|
returns Unicode string. You can also use the alias: _() ;
|
||
|
- ngettext(singular, plural, count): translate a sentence with singular and
|
||
|
plural form. The function always returns Unicode string.
|
||
|
|
||
|
WARNING: Loading this module indirectly calls initLocale() which sets
|
||
|
locale LC_ALL to ''. This is needed to get user preferred locale
|
||
|
settings.
|
||
|
"""
|
||
|
|
||
|
import lib.hachoir_core.config as config
|
||
|
import lib.hachoir_core
|
||
|
import locale
|
||
|
from os import path
|
||
|
import sys
|
||
|
from codecs import BOM_UTF8, BOM_UTF16_LE, BOM_UTF16_BE
|
||
|
|
||
|
def _getTerminalCharset():
|
||
|
"""
|
||
|
Function used by getTerminalCharset() to get terminal charset.
|
||
|
|
||
|
@see getTerminalCharset()
|
||
|
"""
|
||
|
# (1) Try locale.getpreferredencoding()
|
||
|
try:
|
||
|
charset = locale.getpreferredencoding()
|
||
|
if charset:
|
||
|
return charset
|
||
|
except (locale.Error, AttributeError):
|
||
|
pass
|
||
|
|
||
|
# (2) Try locale.nl_langinfo(CODESET)
|
||
|
try:
|
||
|
charset = locale.nl_langinfo(locale.CODESET)
|
||
|
if charset:
|
||
|
return charset
|
||
|
except (locale.Error, AttributeError):
|
||
|
pass
|
||
|
|
||
|
# (3) Try sys.stdout.encoding
|
||
|
if hasattr(sys.stdout, "encoding") and sys.stdout.encoding:
|
||
|
return sys.stdout.encoding
|
||
|
|
||
|
# (4) Otherwise, returns "ASCII"
|
||
|
return "ASCII"
|
||
|
|
||
|
def getTerminalCharset():
|
||
|
"""
|
||
|
Guess terminal charset using differents tests:
|
||
|
1. Try locale.getpreferredencoding()
|
||
|
2. Try locale.nl_langinfo(CODESET)
|
||
|
3. Try sys.stdout.encoding
|
||
|
4. Otherwise, returns "ASCII"
|
||
|
|
||
|
WARNING: Call initLocale() before calling this function.
|
||
|
"""
|
||
|
try:
|
||
|
return getTerminalCharset.value
|
||
|
except AttributeError:
|
||
|
getTerminalCharset.value = _getTerminalCharset()
|
||
|
return getTerminalCharset.value
|
||
|
|
||
|
class UnicodeStdout(object):
|
||
|
def __init__(self, old_device, charset):
|
||
|
self.device = old_device
|
||
|
self.charset = charset
|
||
|
|
||
|
def flush(self):
|
||
|
self.device.flush()
|
||
|
|
||
|
def write(self, text):
|
||
|
if isinstance(text, unicode):
|
||
|
text = text.encode(self.charset, 'replace')
|
||
|
self.device.write(text)
|
||
|
|
||
|
def writelines(self, lines):
|
||
|
for text in lines:
|
||
|
self.write(text)
|
||
|
|
||
|
def initLocale():
|
||
|
# Only initialize locale once
|
||
|
if initLocale.is_done:
|
||
|
return getTerminalCharset()
|
||
|
initLocale.is_done = True
|
||
|
|
||
|
# Setup locales
|
||
|
try:
|
||
|
locale.setlocale(locale.LC_ALL, "")
|
||
|
except (locale.Error, IOError):
|
||
|
pass
|
||
|
|
||
|
# Get the terminal charset
|
||
|
charset = getTerminalCharset()
|
||
|
|
||
|
# UnicodeStdout conflicts with the readline module
|
||
|
if config.unicode_stdout and ('readline' not in sys.modules):
|
||
|
# Replace stdout and stderr by unicode objet supporting unicode string
|
||
|
sys.stdout = UnicodeStdout(sys.stdout, charset)
|
||
|
sys.stderr = UnicodeStdout(sys.stderr, charset)
|
||
|
return charset
|
||
|
initLocale.is_done = False
|
||
|
|
||
|
def _dummy_gettext(text):
|
||
|
return unicode(text)
|
||
|
|
||
|
def _dummy_ngettext(singular, plural, count):
|
||
|
if 1 < abs(count) or not count:
|
||
|
return unicode(plural)
|
||
|
else:
|
||
|
return unicode(singular)
|
||
|
|
||
|
def _initGettext():
|
||
|
charset = initLocale()
|
||
|
|
||
|
# Try to load gettext module
|
||
|
if config.use_i18n:
|
||
|
try:
|
||
|
import gettext
|
||
|
ok = True
|
||
|
except ImportError:
|
||
|
ok = False
|
||
|
else:
|
||
|
ok = False
|
||
|
|
||
|
# gettext is not available or not needed: use dummy gettext functions
|
||
|
if not ok:
|
||
|
return (_dummy_gettext, _dummy_ngettext)
|
||
|
|
||
|
# Gettext variables
|
||
|
package = lib.hachoir_core.PACKAGE
|
||
|
locale_dir = path.join(path.dirname(__file__), "..", "locale")
|
||
|
|
||
|
# Initialize gettext module
|
||
|
gettext.bindtextdomain(package, locale_dir)
|
||
|
gettext.textdomain(package)
|
||
|
translate = gettext.gettext
|
||
|
ngettext = gettext.ngettext
|
||
|
|
||
|
# TODO: translate_unicode lambda function really sucks!
|
||
|
# => find native function to do that
|
||
|
unicode_gettext = lambda text: \
|
||
|
unicode(translate(text), charset)
|
||
|
unicode_ngettext = lambda singular, plural, count: \
|
||
|
unicode(ngettext(singular, plural, count), charset)
|
||
|
return (unicode_gettext, unicode_ngettext)
|
||
|
|
||
|
UTF_BOMS = (
|
||
|
(BOM_UTF8, "UTF-8"),
|
||
|
(BOM_UTF16_LE, "UTF-16-LE"),
|
||
|
(BOM_UTF16_BE, "UTF-16-BE"),
|
||
|
)
|
||
|
|
||
|
# Set of valid characters for specific charset
|
||
|
CHARSET_CHARACTERS = (
|
||
|
# U+00E0: LATIN SMALL LETTER A WITH GRAVE
|
||
|
(set(u"©®éêè\xE0ç".encode("ISO-8859-1")), "ISO-8859-1"),
|
||
|
(set(u"©®éêè\xE0ç€".encode("ISO-8859-15")), "ISO-8859-15"),
|
||
|
(set(u"©®".encode("MacRoman")), "MacRoman"),
|
||
|
(set(u"εδηιθκμοΡσςυΈί".encode("ISO-8859-7")), "ISO-8859-7"),
|
||
|
)
|
||
|
|
||
|
def guessBytesCharset(bytes, default=None):
|
||
|
r"""
|
||
|
>>> guessBytesCharset("abc")
|
||
|
'ASCII'
|
||
|
>>> guessBytesCharset("\xEF\xBB\xBFabc")
|
||
|
'UTF-8'
|
||
|
>>> guessBytesCharset("abc\xC3\xA9")
|
||
|
'UTF-8'
|
||
|
>>> guessBytesCharset("File written by Adobe Photoshop\xA8 4.0\0")
|
||
|
'MacRoman'
|
||
|
>>> guessBytesCharset("\xE9l\xE9phant")
|
||
|
'ISO-8859-1'
|
||
|
>>> guessBytesCharset("100 \xA4")
|
||
|
'ISO-8859-15'
|
||
|
>>> guessBytesCharset('Word \xb8\xea\xe4\xef\xf3\xe7 - Microsoft Outlook 97 - \xd1\xf5\xe8\xec\xdf\xf3\xe5\xe9\xf2 e-mail')
|
||
|
'ISO-8859-7'
|
||
|
"""
|
||
|
# Check for UTF BOM
|
||
|
for bom_bytes, charset in UTF_BOMS:
|
||
|
if bytes.startswith(bom_bytes):
|
||
|
return charset
|
||
|
|
||
|
# Pure ASCII?
|
||
|
try:
|
||
|
text = unicode(bytes, 'ASCII', 'strict')
|
||
|
return 'ASCII'
|
||
|
except UnicodeDecodeError:
|
||
|
pass
|
||
|
|
||
|
# Valid UTF-8?
|
||
|
try:
|
||
|
text = unicode(bytes, 'UTF-8', 'strict')
|
||
|
return 'UTF-8'
|
||
|
except UnicodeDecodeError:
|
||
|
pass
|
||
|
|
||
|
# Create a set of non-ASCII characters
|
||
|
non_ascii_set = set( byte for byte in bytes if ord(byte) >= 128 )
|
||
|
for characters, charset in CHARSET_CHARACTERS:
|
||
|
if characters.issuperset(non_ascii_set):
|
||
|
return charset
|
||
|
return default
|
||
|
|
||
|
# Initialize _(), gettext() and ngettext() functions
|
||
|
gettext, ngettext = _initGettext()
|
||
|
_ = gettext
|
||
|
|