SickRage/lib/hachoir_core/i18n.py

# -*- coding: UTF-8 -*-
"""
Functions to manage internationalisation (i18n):
- initLocale(): setup locales and install Unicode compatible stdout and
  stderr ;
- getTerminalCharset(): guess terminal charset ;
- gettext(text) translate a string to current language. The function always
  returns Unicode string. You can also use the alias: _() ;
- ngettext(singular, plural, count): translate a sentence with singular and
  plural form. The function always returns Unicode string.

WARNING: Loading this module indirectly calls initLocale() which sets
         locale LC_ALL to ''. This is needed to get user preferred locale
         settings.
"""

import lib.hachoir_core.config as config
import lib.hachoir_core
import locale
from os import path
import sys
from codecs import BOM_UTF8, BOM_UTF16_LE, BOM_UTF16_BE

def _getTerminalCharset():
    """
    Function used by getTerminalCharset() to get terminal charset.

    @see getTerminalCharset()
    """
    # (1) Try locale.getpreferredencoding()
    try:
        charset = locale.getpreferredencoding()
        if charset:
            return charset
    except (locale.Error, AttributeError):
        pass

    # (2) Try locale.nl_langinfo(CODESET)
    try:
        charset = locale.nl_langinfo(locale.CODESET)
        if charset:
            return charset
    except (locale.Error, AttributeError):
        pass

    # (3) Try sys.stdout.encoding
    if hasattr(sys.stdout, "encoding") and sys.stdout.encoding:
        return sys.stdout.encoding

    # (4) Otherwise, returns "ASCII"
    return "ASCII"

def getTerminalCharset():
    """
    Guess terminal charset using differents tests:
    1. Try locale.getpreferredencoding()
    2. Try locale.nl_langinfo(CODESET)
    3. Try sys.stdout.encoding
    4. Otherwise, returns "ASCII"

    WARNING: Call initLocale() before calling this function.
    """
    try:
        return getTerminalCharset.value
    except AttributeError:
        getTerminalCharset.value = _getTerminalCharset()
        return getTerminalCharset.value

class UnicodeStdout(object):
    def __init__(self, old_device, charset):
        self.device = old_device
        self.charset = charset

    def flush(self):
        self.device.flush()

    def write(self, text):
        if isinstance(text, unicode):
            text = text.encode(self.charset, 'replace')
        self.device.write(text)

    def writelines(self, lines):
        for text in lines:
            self.write(text)

def initLocale():
    # Only initialize locale once
    if initLocale.is_done:
        return getTerminalCharset()
    initLocale.is_done = True

    # Setup locales
    try:
        locale.setlocale(locale.LC_ALL, "")
    except (locale.Error, IOError):
        pass

    # Get the terminal charset
    charset = getTerminalCharset()

    # UnicodeStdout conflicts with the readline module
    if config.unicode_stdout and ('readline' not in sys.modules):
        # Replace stdout and stderr by unicode objet supporting unicode string
        sys.stdout = UnicodeStdout(sys.stdout, charset)
        sys.stderr = UnicodeStdout(sys.stderr, charset)
    return charset
initLocale.is_done = False

def _dummy_gettext(text):
    return unicode(text)

def _dummy_ngettext(singular, plural, count):
    if 1 < abs(count) or not count:
        return unicode(plural)
    else:
        return unicode(singular)

def _initGettext():
    charset = initLocale()

    # Try to load gettext module
    if config.use_i18n:
        try:
            import gettext
            ok = True
        except ImportError:
            ok = False
    else:
        ok = False

    # gettext is not available or not needed: use dummy gettext functions
    if not ok:
        return (_dummy_gettext, _dummy_ngettext)

    # Gettext variables
    package = lib.hachoir_core.PACKAGE
    locale_dir = path.join(path.dirname(__file__), "..", "locale")

    # Initialize gettext module
    gettext.bindtextdomain(package, locale_dir)
    gettext.textdomain(package)
    translate = gettext.gettext
    ngettext = gettext.ngettext

    # TODO: translate_unicode lambda function really sucks!
    # => find native function to do that
    unicode_gettext = lambda text: \
        unicode(translate(text), charset)
    unicode_ngettext = lambda singular, plural, count: \
        unicode(ngettext(singular, plural, count), charset)
    return (unicode_gettext, unicode_ngettext)

UTF_BOMS = (
    (BOM_UTF8, "UTF-8"),
    (BOM_UTF16_LE, "UTF-16-LE"),
    (BOM_UTF16_BE, "UTF-16-BE"),
)

# Set of valid characters for specific charset
CHARSET_CHARACTERS = (
    # U+00E0: LATIN SMALL LETTER A WITH GRAVE
    (set(u"©®éêè\xE0ç".encode("ISO-8859-1")), "ISO-8859-1"),
    (set(u"©®éêè\xE0ç€".encode("ISO-8859-15")), "ISO-8859-15"),
    (set(u"©®".encode("MacRoman")), "MacRoman"),
    (set(u"εδηιθκμοΡσςυΈί".encode("ISO-8859-7")), "ISO-8859-7"),
)

def guessBytesCharset(bytes, default=None):
    r"""
    >>> guessBytesCharset("abc")
    'ASCII'
    >>> guessBytesCharset("\xEF\xBB\xBFabc")
    'UTF-8'
    >>> guessBytesCharset("abc\xC3\xA9")
    'UTF-8'
    >>> guessBytesCharset("File written by Adobe Photoshop\xA8 4.0\0")
    'MacRoman'
    >>> guessBytesCharset("\xE9l\xE9phant")
    'ISO-8859-1'
    >>> guessBytesCharset("100 \xA4")
    'ISO-8859-15'
    >>> guessBytesCharset('Word \xb8\xea\xe4\xef\xf3\xe7 - Microsoft Outlook 97 - \xd1\xf5\xe8\xec\xdf\xf3\xe5\xe9\xf2 e-mail')
    'ISO-8859-7'
    """
    # Check for UTF BOM
    for bom_bytes, charset in UTF_BOMS:
        if bytes.startswith(bom_bytes):
            return charset

    # Pure ASCII?
    try:
        text = unicode(bytes, 'ASCII', 'strict')
        return 'ASCII'
    except UnicodeDecodeError:
        pass

    # Valid UTF-8?
    try:
        text = unicode(bytes, 'UTF-8', 'strict')
        return 'UTF-8'
    except UnicodeDecodeError:
        pass

    # Create a set of non-ASCII characters
    non_ascii_set = set( byte for byte in bytes if ord(byte) >= 128 )
    for characters, charset in CHARSET_CHARACTERS:
        if characters.issuperset(non_ascii_set):
            return charset
    return default

# Initialize _(), gettext() and ngettext() functions
gettext, ngettext = _initGettext()
_ = gettext