mirror of
https://github.com/moparisthebest/SickRage
synced 2024-10-31 23:45:02 -04:00
64 lines
1.8 KiB
Python
64 lines
1.8 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
"""Transliterate Unicode text into plain 7-bit ASCII.
|
||
|
|
||
|
Example usage:
|
||
|
>>> from unidecode import unidecode:
|
||
|
>>> unidecode(u"\u5317\u4EB0")
|
||
|
"Bei Jing "
|
||
|
|
||
|
The transliteration uses a straightforward map, and doesn't have alternatives
|
||
|
for the same character based on language, position, or anything else.
|
||
|
|
||
|
In Python 3, a standard string object will be returned. If you need bytes, use:
|
||
|
>>> unidecode("Κνωσός").encode("ascii")
|
||
|
b'Knosos'
|
||
|
"""
|
||
|
import warnings
|
||
|
from sys import version_info
|
||
|
|
||
|
Cache = {}
|
||
|
|
||
|
def unidecode(string):
|
||
|
"""Transliterate an Unicode object into an ASCII string
|
||
|
|
||
|
>>> unidecode(u"\u5317\u4EB0")
|
||
|
"Bei Jing "
|
||
|
"""
|
||
|
|
||
|
if version_info[0] < 3 and not isinstance(string, unicode):
|
||
|
warnings.warn( "Argument %r is not an unicode object. "
|
||
|
"Passing an encoded string will likely have "
|
||
|
"unexpected results." % (type(string),),
|
||
|
RuntimeWarning, 2)
|
||
|
|
||
|
retval = []
|
||
|
|
||
|
for char in string:
|
||
|
codepoint = ord(char)
|
||
|
|
||
|
if codepoint < 0x80: # Basic ASCII
|
||
|
retval.append(str(char))
|
||
|
continue
|
||
|
|
||
|
if codepoint > 0xeffff:
|
||
|
continue # Characters in Private Use Area and above are ignored
|
||
|
|
||
|
section = codepoint >> 8 # Chop off the last two hex digits
|
||
|
position = codepoint % 256 # Last two hex digits
|
||
|
|
||
|
try:
|
||
|
table = Cache[section]
|
||
|
except KeyError:
|
||
|
try:
|
||
|
mod = __import__('unidecode.x%03x'%(section), [], [], ['data'])
|
||
|
except ImportError:
|
||
|
Cache[section] = None
|
||
|
continue # No match: ignore this character and carry on.
|
||
|
|
||
|
Cache[section] = table = mod.data
|
||
|
|
||
|
if table and len(table) > position:
|
||
|
retval.append( table[position] )
|
||
|
|
||
|
return ''.join(retval)
|