1
0
mirror of https://github.com/moparisthebest/SickRage synced 2024-11-16 06:15:09 -05:00
SickRage/lib/hachoir_core/i18n.py
echel0n 0d9fbc1ad7 Welcome to our SickBeard-TVRage Edition ...
This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer.

Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer.

Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk!

Enjoy!
2014-03-09 22:39:12 -07:00

215 lines
6.1 KiB
Python

# -*- coding: UTF-8 -*-
"""
Functions to manage internationalisation (i18n):
- initLocale(): setup locales and install Unicode compatible stdout and
stderr ;
- getTerminalCharset(): guess terminal charset ;
- gettext(text) translate a string to current language. The function always
returns Unicode string. You can also use the alias: _() ;
- ngettext(singular, plural, count): translate a sentence with singular and
plural form. The function always returns Unicode string.
WARNING: Loading this module indirectly calls initLocale() which sets
locale LC_ALL to ''. This is needed to get user preferred locale
settings.
"""
import lib.hachoir_core.config as config
import lib.hachoir_core
import locale
from os import path
import sys
from codecs import BOM_UTF8, BOM_UTF16_LE, BOM_UTF16_BE
def _getTerminalCharset():
"""
Function used by getTerminalCharset() to get terminal charset.
@see getTerminalCharset()
"""
# (1) Try locale.getpreferredencoding()
try:
charset = locale.getpreferredencoding()
if charset:
return charset
except (locale.Error, AttributeError):
pass
# (2) Try locale.nl_langinfo(CODESET)
try:
charset = locale.nl_langinfo(locale.CODESET)
if charset:
return charset
except (locale.Error, AttributeError):
pass
# (3) Try sys.stdout.encoding
if hasattr(sys.stdout, "encoding") and sys.stdout.encoding:
return sys.stdout.encoding
# (4) Otherwise, returns "ASCII"
return "ASCII"
def getTerminalCharset():
"""
Guess terminal charset using differents tests:
1. Try locale.getpreferredencoding()
2. Try locale.nl_langinfo(CODESET)
3. Try sys.stdout.encoding
4. Otherwise, returns "ASCII"
WARNING: Call initLocale() before calling this function.
"""
try:
return getTerminalCharset.value
except AttributeError:
getTerminalCharset.value = _getTerminalCharset()
return getTerminalCharset.value
class UnicodeStdout(object):
def __init__(self, old_device, charset):
self.device = old_device
self.charset = charset
def flush(self):
self.device.flush()
def write(self, text):
if isinstance(text, unicode):
text = text.encode(self.charset, 'replace')
self.device.write(text)
def writelines(self, lines):
for text in lines:
self.write(text)
def initLocale():
# Only initialize locale once
if initLocale.is_done:
return getTerminalCharset()
initLocale.is_done = True
# Setup locales
try:
locale.setlocale(locale.LC_ALL, "")
except (locale.Error, IOError):
pass
# Get the terminal charset
charset = getTerminalCharset()
# UnicodeStdout conflicts with the readline module
if config.unicode_stdout and ('readline' not in sys.modules):
# Replace stdout and stderr by unicode objet supporting unicode string
sys.stdout = UnicodeStdout(sys.stdout, charset)
sys.stderr = UnicodeStdout(sys.stderr, charset)
return charset
initLocale.is_done = False
def _dummy_gettext(text):
return unicode(text)
def _dummy_ngettext(singular, plural, count):
if 1 < abs(count) or not count:
return unicode(plural)
else:
return unicode(singular)
def _initGettext():
charset = initLocale()
# Try to load gettext module
if config.use_i18n:
try:
import gettext
ok = True
except ImportError:
ok = False
else:
ok = False
# gettext is not available or not needed: use dummy gettext functions
if not ok:
return (_dummy_gettext, _dummy_ngettext)
# Gettext variables
package = lib.hachoir_core.PACKAGE
locale_dir = path.join(path.dirname(__file__), "..", "locale")
# Initialize gettext module
gettext.bindtextdomain(package, locale_dir)
gettext.textdomain(package)
translate = gettext.gettext
ngettext = gettext.ngettext
# TODO: translate_unicode lambda function really sucks!
# => find native function to do that
unicode_gettext = lambda text: \
unicode(translate(text), charset)
unicode_ngettext = lambda singular, plural, count: \
unicode(ngettext(singular, plural, count), charset)
return (unicode_gettext, unicode_ngettext)
UTF_BOMS = (
(BOM_UTF8, "UTF-8"),
(BOM_UTF16_LE, "UTF-16-LE"),
(BOM_UTF16_BE, "UTF-16-BE"),
)
# Set of valid characters for specific charset
CHARSET_CHARACTERS = (
# U+00E0: LATIN SMALL LETTER A WITH GRAVE
(set(u"©®éêè\xE0ç".encode("ISO-8859-1")), "ISO-8859-1"),
(set(u"©®éêè\xE0ç€".encode("ISO-8859-15")), "ISO-8859-15"),
(set(u"©®".encode("MacRoman")), "MacRoman"),
(set(u"εδηιθκμοΡσςυΈί".encode("ISO-8859-7")), "ISO-8859-7"),
)
def guessBytesCharset(bytes, default=None):
r"""
>>> guessBytesCharset("abc")
'ASCII'
>>> guessBytesCharset("\xEF\xBB\xBFabc")
'UTF-8'
>>> guessBytesCharset("abc\xC3\xA9")
'UTF-8'
>>> guessBytesCharset("File written by Adobe Photoshop\xA8 4.0\0")
'MacRoman'
>>> guessBytesCharset("\xE9l\xE9phant")
'ISO-8859-1'
>>> guessBytesCharset("100 \xA4")
'ISO-8859-15'
>>> guessBytesCharset('Word \xb8\xea\xe4\xef\xf3\xe7 - Microsoft Outlook 97 - \xd1\xf5\xe8\xec\xdf\xf3\xe5\xe9\xf2 e-mail')
'ISO-8859-7'
"""
# Check for UTF BOM
for bom_bytes, charset in UTF_BOMS:
if bytes.startswith(bom_bytes):
return charset
# Pure ASCII?
try:
text = unicode(bytes, 'ASCII', 'strict')
return 'ASCII'
except UnicodeDecodeError:
pass
# Valid UTF-8?
try:
text = unicode(bytes, 'UTF-8', 'strict')
return 'UTF-8'
except UnicodeDecodeError:
pass
# Create a set of non-ASCII characters
non_ascii_set = set( byte for byte in bytes if ord(byte) >= 128 )
for characters, charset in CHARSET_CHARACTERS:
if characters.issuperset(non_ascii_set):
return charset
return default
# Initialize _(), gettext() and ngettext() functions
gettext, ngettext = _initGettext()
_ = gettext