SickRage/lib/hachoir_core/i18n.py

# -*- coding: UTF-8 -*-
"""
Functions to manage internationalisation (i18n):
- initLocale(): setup locales and install Unicode compatible stdout and
  stderr ;
- getTerminalCharset(): guess terminal charset ;
- gettext(text) translate a string to current language. The function always
  returns Unicode string. You can also use the alias: _() ;
- ngettext(singular, plural, count): translate a sentence with singular and
  plural form. The function always returns Unicode string.

WARNING: Loading this module indirectly calls initLocale() which sets
         locale LC_ALL to ''. This is needed to get user preferred locale
         settings.
"""

import lib.hachoir_core.config as config
import lib.hachoir_core
import locale
from os import path
import sys
from codecs import BOM_UTF8, BOM_UTF16_LE, BOM_UTF16_BE

def _getTerminalCharset():
    """
    Function used by getTerminalCharset() to get terminal charset.

    @see getTerminalCharset()
    """
    # (1) Try locale.getpreferredencoding()
    try:
        charset = locale.getpreferredencoding()
        if charset:
            return charset
    except (locale.Error, AttributeError):
        pass

    # (2) Try locale.nl_langinfo(CODESET)
    try:
        charset = locale.nl_langinfo(locale.CODESET)
        if charset:
            return charset
    except (locale.Error, AttributeError):
        pass

    # (3) Try sys.stdout.encoding
    if hasattr(sys.stdout, "encoding") and sys.stdout.encoding:
        return sys.stdout.encoding

    # (4) Otherwise, returns "ASCII"
    return "ASCII"

def getTerminalCharset():
    """
    Guess terminal charset using differents tests:
    1. Try locale.getpreferredencoding()
    2. Try locale.nl_langinfo(CODESET)
    3. Try sys.stdout.encoding
    4. Otherwise, returns "ASCII"

    WARNING: Call initLocale() before calling this function.
    """
    try:
        return getTerminalCharset.value
    except AttributeError:
        getTerminalCharset.value = _getTerminalCharset()
        return getTerminalCharset.value

class UnicodeStdout(object):
    def __init__(self, old_device, charset):
        self.device = old_device
        self.charset = charset

    def flush(self):
        self.device.flush()

    def write(self, text):
        if isinstance(text, unicode):
            text = text.encode(self.charset, 'replace')
        self.device.write(text)

    def writelines(self, lines):
        for text in lines:
            self.write(text)

def initLocale():
    # Only initialize locale once
    if initLocale.is_done:
        return getTerminalCharset()
    initLocale.is_done = True

    # Setup locales
    try:
        locale.setlocale(locale.LC_ALL, "")
    except (locale.Error, IOError):
        pass

    # Get the terminal charset
    charset = getTerminalCharset()

    # UnicodeStdout conflicts with the readline module
    if config.unicode_stdout and ('readline' not in sys.modules):
        # Replace stdout and stderr by unicode objet supporting unicode string
        sys.stdout = UnicodeStdout(sys.stdout, charset)
        sys.stderr = UnicodeStdout(sys.stderr, charset)
    return charset
initLocale.is_done = False

def _dummy_gettext(text):
    return unicode(text)

def _dummy_ngettext(singular, plural, count):
    if 1 < abs(count) or not count:
        return unicode(plural)
    else:
        return unicode(singular)

def _initGettext():
    charset = initLocale()

    # Try to load gettext module
    if config.use_i18n:
        try:
            import gettext
            ok = True
        except ImportError:
            ok = False
    else:
        ok = False

    # gettext is not available or not needed: use dummy gettext functions
    if not ok:
        return (_dummy_gettext, _dummy_ngettext)

    # Gettext variables
    package = lib.hachoir_core.PACKAGE
    locale_dir = path.join(path.dirname(__file__), "..", "locale")

    # Initialize gettext module
    gettext.bindtextdomain(package, locale_dir)
    gettext.textdomain(package)
    translate = gettext.gettext
    ngettext = gettext.ngettext

    # TODO: translate_unicode lambda function really sucks!
    # => find native function to do that
    unicode_gettext = lambda text: \
        unicode(translate(text), charset)
    unicode_ngettext = lambda singular, plural, count: \
        unicode(ngettext(singular, plural, count), charset)
    return (unicode_gettext, unicode_ngettext)

UTF_BOMS = (
    (BOM_UTF8, "UTF-8"),
    (BOM_UTF16_LE, "UTF-16-LE"),
    (BOM_UTF16_BE, "UTF-16-BE"),
)

# Set of valid characters for specific charset
CHARSET_CHARACTERS = (
    # U+00E0: LATIN SMALL LETTER A WITH GRAVE
    (set(u"©®éêè\xE0ç".encode("ISO-8859-1")), "ISO-8859-1"),
    (set(u"©®éêè\xE0ç€".encode("ISO-8859-15")), "ISO-8859-15"),
    (set(u"©®".encode("MacRoman")), "MacRoman"),
    (set(u"εδηιθκμοΡσςυΈί".encode("ISO-8859-7")), "ISO-8859-7"),
)

def guessBytesCharset(bytes, default=None):
    r"""
    >>> guessBytesCharset("abc")
    'ASCII'
    >>> guessBytesCharset("\xEF\xBB\xBFabc")
    'UTF-8'
    >>> guessBytesCharset("abc\xC3\xA9")
    'UTF-8'
    >>> guessBytesCharset("File written by Adobe Photoshop\xA8 4.0\0")
    'MacRoman'
    >>> guessBytesCharset("\xE9l\xE9phant")
    'ISO-8859-1'
    >>> guessBytesCharset("100 \xA4")
    'ISO-8859-15'
    >>> guessBytesCharset('Word \xb8\xea\xe4\xef\xf3\xe7 - Microsoft Outlook 97 - \xd1\xf5\xe8\xec\xdf\xf3\xe5\xe9\xf2 e-mail')
    'ISO-8859-7'
    """
    # Check for UTF BOM
    for bom_bytes, charset in UTF_BOMS:
        if bytes.startswith(bom_bytes):
            return charset

    # Pure ASCII?
    try:
        text = unicode(bytes, 'ASCII', 'strict')
        return 'ASCII'
    except UnicodeDecodeError:
        pass

    # Valid UTF-8?
    try:
        text = unicode(bytes, 'UTF-8', 'strict')
        return 'UTF-8'
    except UnicodeDecodeError:
        pass

    # Create a set of non-ASCII characters
    non_ascii_set = set( byte for byte in bytes if ord(byte) >= 128 )
    for characters, charset in CHARSET_CHARACTERS:
        if characters.issuperset(non_ascii_set):
            return charset
    return default

# Initialize _(), gettext() and ngettext() functions
gettext, ngettext = _initGettext()
_ = gettext
Welcome to our SickBeard-TVRage Edition ... This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer. Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer. Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk! Enjoy! 2014-03-10 01:18:05 -04:00			`# -- coding: UTF-8 --`
			`"""`
			`Functions to manage internationalisation (i18n):`
			`- initLocale(): setup locales and install Unicode compatible stdout and`
			`stderr ;`
			`- getTerminalCharset(): guess terminal charset ;`
			`- gettext(text) translate a string to current language. The function always`
			`returns Unicode string. You can also use the alias: _() ;`
			`- ngettext(singular, plural, count): translate a sentence with singular and`
			`plural form. The function always returns Unicode string.`

			`WARNING: Loading this module indirectly calls initLocale() which sets`
			`locale LC_ALL to ''. This is needed to get user preferred locale`
			`settings.`
			`"""`

			`import lib.hachoir_core.config as config`
			`import lib.hachoir_core`
			`import locale`
			`from os import path`
			`import sys`
			`from codecs import BOM_UTF8, BOM_UTF16_LE, BOM_UTF16_BE`

			`def _getTerminalCharset():`
			`"""`
			`Function used by getTerminalCharset() to get terminal charset.`

			`@see getTerminalCharset()`
			`"""`
			`# (1) Try locale.getpreferredencoding()`
			`try:`
			`charset = locale.getpreferredencoding()`
			`if charset:`
			`return charset`
			`except (locale.Error, AttributeError):`
			`pass`

			`# (2) Try locale.nl_langinfo(CODESET)`
			`try:`
			`charset = locale.nl_langinfo(locale.CODESET)`
			`if charset:`
			`return charset`
			`except (locale.Error, AttributeError):`
			`pass`

			`# (3) Try sys.stdout.encoding`
			`if hasattr(sys.stdout, "encoding") and sys.stdout.encoding:`
			`return sys.stdout.encoding`

			`# (4) Otherwise, returns "ASCII"`
			`return "ASCII"`

			`def getTerminalCharset():`
			`"""`
			`Guess terminal charset using differents tests:`
			`1. Try locale.getpreferredencoding()`
			`2. Try locale.nl_langinfo(CODESET)`
			`3. Try sys.stdout.encoding`
			`4. Otherwise, returns "ASCII"`

			`WARNING: Call initLocale() before calling this function.`
			`"""`
			`try:`
			`return getTerminalCharset.value`
			`except AttributeError:`
			`getTerminalCharset.value = _getTerminalCharset()`
			`return getTerminalCharset.value`

			`class UnicodeStdout(object):`
			`def __init__(self, old_device, charset):`
			`self.device = old_device`
			`self.charset = charset`

			`def flush(self):`
			`self.device.flush()`

			`def write(self, text):`
			`if isinstance(text, unicode):`
			`text = text.encode(self.charset, 'replace')`
			`self.device.write(text)`

			`def writelines(self, lines):`
			`for text in lines:`
			`self.write(text)`

			`def initLocale():`
			`# Only initialize locale once`
			`if initLocale.is_done:`
			`return getTerminalCharset()`
			`initLocale.is_done = True`

			`# Setup locales`
			`try:`
			`locale.setlocale(locale.LC_ALL, "")`
			`except (locale.Error, IOError):`
			`pass`

			`# Get the terminal charset`
			`charset = getTerminalCharset()`

			`# UnicodeStdout conflicts with the readline module`
			`if config.unicode_stdout and ('readline' not in sys.modules):`
			`# Replace stdout and stderr by unicode objet supporting unicode string`
			`sys.stdout = UnicodeStdout(sys.stdout, charset)`
			`sys.stderr = UnicodeStdout(sys.stderr, charset)`
			`return charset`
			`initLocale.is_done = False`

			`def _dummy_gettext(text):`
			`return unicode(text)`

			`def _dummy_ngettext(singular, plural, count):`
			`if 1 < abs(count) or not count:`
			`return unicode(plural)`
			`else:`
			`return unicode(singular)`

			`def _initGettext():`
			`charset = initLocale()`

			`# Try to load gettext module`
			`if config.use_i18n:`
			`try:`
			`import gettext`
			`ok = True`
			`except ImportError:`
			`ok = False`
			`else:`
			`ok = False`

			`# gettext is not available or not needed: use dummy gettext functions`
			`if not ok:`
			`return (_dummy_gettext, _dummy_ngettext)`

			`# Gettext variables`
			`package = lib.hachoir_core.PACKAGE`
			`locale_dir = path.join(path.dirname(__file__), "..", "locale")`

			`# Initialize gettext module`
			`gettext.bindtextdomain(package, locale_dir)`
			`gettext.textdomain(package)`
			`translate = gettext.gettext`
			`ngettext = gettext.ngettext`

			`# TODO: translate_unicode lambda function really sucks!`
			`# => find native function to do that`
			`unicode_gettext = lambda text: \`
			`unicode(translate(text), charset)`
			`unicode_ngettext = lambda singular, plural, count: \`
			`unicode(ngettext(singular, plural, count), charset)`
			`return (unicode_gettext, unicode_ngettext)`

			`UTF_BOMS = (`
			`(BOM_UTF8, "UTF-8"),`
			`(BOM_UTF16_LE, "UTF-16-LE"),`
			`(BOM_UTF16_BE, "UTF-16-BE"),`
			`)`

			`# Set of valid characters for specific charset`
			`CHARSET_CHARACTERS = (`
			`# U+00E0: LATIN SMALL LETTER A WITH GRAVE`
			`(set(u"©®éêè\xE0ç".encode("ISO-8859-1")), "ISO-8859-1"),`
			`(set(u"©®éêè\xE0ç€".encode("ISO-8859-15")), "ISO-8859-15"),`
			`(set(u"©®".encode("MacRoman")), "MacRoman"),`
			`(set(u"εδηιθκμοΡσςυΈί".encode("ISO-8859-7")), "ISO-8859-7"),`
			`)`

			`def guessBytesCharset(bytes, default=None):`
			`r"""`
			`>>> guessBytesCharset("abc")`
			`'ASCII'`
			`>>> guessBytesCharset("\xEF\xBB\xBFabc")`
			`'UTF-8'`
			`>>> guessBytesCharset("abc\xC3\xA9")`
			`'UTF-8'`
			`>>> guessBytesCharset("File written by Adobe Photoshop\xA8 4.0\0")`
			`'MacRoman'`
			`>>> guessBytesCharset("\xE9l\xE9phant")`
			`'ISO-8859-1'`
			`>>> guessBytesCharset("100 \xA4")`
			`'ISO-8859-15'`
			`>>> guessBytesCharset('Word \xb8\xea\xe4\xef\xf3\xe7 - Microsoft Outlook 97 - \xd1\xf5\xe8\xec\xdf\xf3\xe5\xe9\xf2 e-mail')`
			`'ISO-8859-7'`
			`"""`
			`# Check for UTF BOM`
			`for bom_bytes, charset in UTF_BOMS:`
			`if bytes.startswith(bom_bytes):`
			`return charset`

			`# Pure ASCII?`
			`try:`
			`text = unicode(bytes, 'ASCII', 'strict')`
			`return 'ASCII'`
			`except UnicodeDecodeError:`
			`pass`

			`# Valid UTF-8?`
			`try:`
			`text = unicode(bytes, 'UTF-8', 'strict')`
			`return 'UTF-8'`
			`except UnicodeDecodeError:`
			`pass`

			`# Create a set of non-ASCII characters`
			`non_ascii_set = set( byte for byte in bytes if ord(byte) >= 128 )`
			`for characters, charset in CHARSET_CHARACTERS:`
			`if characters.issuperset(non_ascii_set):`
			`return charset`
			`return default`

			`# Initialize _(), gettext() and ngettext() functions`
			`gettext, ngettext = _initGettext()`
			`_ = gettext`