2014-03-10 01:18:05 -04:00
|
|
|
# Author: Nic Wolfe <nic@wolfeden.ca>
|
|
|
|
# URL: http://code.google.com/p/sickbeard/
|
|
|
|
#
|
2014-05-23 08:37:22 -04:00
|
|
|
# This file is part of SickRage.
|
2014-03-10 01:18:05 -04:00
|
|
|
#
|
2014-05-23 08:37:22 -04:00
|
|
|
# SickRage is free software: you can redistribute it and/or modify
|
2014-03-10 01:18:05 -04:00
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
2014-05-23 08:37:22 -04:00
|
|
|
# SickRage is distributed in the hope that it will be useful,
|
2014-03-10 01:18:05 -04:00
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
2014-11-24 17:31:50 -05:00
|
|
|
# GNU General Public License for more details.
|
2014-03-10 01:18:05 -04:00
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
2014-05-23 08:37:22 -04:00
|
|
|
# along with SickRage. If not, see <http://www.gnu.org/licenses/>.
|
2014-03-10 01:18:05 -04:00
|
|
|
|
|
|
|
import os
|
2014-11-25 20:22:31 -05:00
|
|
|
import traceback
|
2014-12-07 12:16:41 -05:00
|
|
|
import re
|
2014-03-10 01:18:05 -04:00
|
|
|
import sickbeard
|
2014-11-25 20:22:31 -05:00
|
|
|
import six
|
|
|
|
import chardet
|
2014-12-07 12:16:41 -05:00
|
|
|
import unicodedata
|
2014-11-25 20:22:31 -05:00
|
|
|
|
2014-12-07 12:16:41 -05:00
|
|
|
from string import ascii_letters, digits
|
2014-12-05 23:13:50 -05:00
|
|
|
from sickbeard import logger
|
|
|
|
|
2014-12-07 12:16:41 -05:00
|
|
|
def toSafeString(original):
|
|
|
|
valid_chars = "-_.() %s%s" % (ascii_letters, digits)
|
|
|
|
cleaned_filename = unicodedata.normalize('NFKD', _toUnicode(original)).encode('ASCII', 'ignore')
|
|
|
|
valid_string = ''.join(c for c in cleaned_filename if c in valid_chars)
|
|
|
|
return ' '.join(valid_string.split())
|
|
|
|
|
|
|
|
|
|
|
|
def simplifyString(original):
|
|
|
|
string = stripAccents(original.lower())
|
|
|
|
string = toSafeString(' '.join(re.split('\W+', string)))
|
|
|
|
split = re.split('\W+|_', string.lower())
|
|
|
|
return _toUnicode(' '.join(split))
|
2014-03-10 01:18:05 -04:00
|
|
|
|
2014-12-05 23:13:50 -05:00
|
|
|
def _toUnicode(x):
|
2014-12-07 12:16:41 -05:00
|
|
|
if isinstance(x, unicode):
|
|
|
|
return x
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
return six.text_type(x)
|
|
|
|
except:
|
2014-11-25 20:22:31 -05:00
|
|
|
try:
|
2014-12-07 12:16:41 -05:00
|
|
|
if chardet.detect(x).get('encoding') == 'utf-8':
|
|
|
|
return x.decode('utf-8')
|
|
|
|
if isinstance(x, str):
|
|
|
|
try:
|
|
|
|
return x.decode(sickbeard.SYS_ENCODING)
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
raise
|
|
|
|
return x
|
2014-11-25 20:22:31 -05:00
|
|
|
except:
|
2014-12-07 12:16:41 -05:00
|
|
|
return x
|
2014-11-25 20:22:31 -05:00
|
|
|
|
|
|
|
def ss(x):
|
2014-12-05 23:13:50 -05:00
|
|
|
u_x = _toUnicode(x)
|
2014-11-24 17:31:50 -05:00
|
|
|
|
2014-11-25 20:22:31 -05:00
|
|
|
try:
|
2014-12-12 23:25:44 -05:00
|
|
|
u_x_encoded = u_x.encode(sickbeard.SYS_ENCODING, 'xmlcharrefreplace')
|
2014-12-07 12:16:41 -05:00
|
|
|
except:
|
2014-11-25 20:22:31 -05:00
|
|
|
try:
|
2014-12-12 23:25:44 -05:00
|
|
|
u_x_encoded = u_x.encode(sickbeard.SYS_ENCODING)
|
2014-11-25 20:22:31 -05:00
|
|
|
except:
|
2014-12-09 22:31:24 -05:00
|
|
|
try:
|
2014-12-12 23:25:44 -05:00
|
|
|
u_x_encoded = u_x.encode(sickbeard.SYS_ENCODING, 'replace')
|
2014-12-09 22:31:24 -05:00
|
|
|
except:
|
2014-12-12 23:25:44 -05:00
|
|
|
try:
|
|
|
|
u_x_encoded = u_x.encode('utf-8', 'replace')
|
|
|
|
except:
|
2014-12-13 14:19:13 -05:00
|
|
|
try:
|
|
|
|
u_x_encoded = str(x)
|
|
|
|
except:
|
|
|
|
u_x_encoded = x
|
2014-12-12 23:25:44 -05:00
|
|
|
|
|
|
|
return u_x_encoded
|
2014-03-10 01:18:05 -04:00
|
|
|
|
|
|
|
def fixListEncodings(x):
|
2014-11-25 20:22:31 -05:00
|
|
|
if not isinstance(x, (list, tuple)):
|
2014-03-10 01:18:05 -04:00
|
|
|
return x
|
|
|
|
else:
|
2014-12-05 23:13:50 -05:00
|
|
|
return filter(lambda x: x != None, map(_toUnicode, x))
|
2014-03-10 01:18:05 -04:00
|
|
|
|
2014-11-24 22:37:43 -05:00
|
|
|
|
2014-05-14 12:18:54 -04:00
|
|
|
def ek(func, *args, **kwargs):
|
2014-03-10 01:18:05 -04:00
|
|
|
if os.name == 'nt':
|
2014-05-14 12:18:54 -04:00
|
|
|
result = func(*args, **kwargs)
|
2014-03-10 01:18:05 -04:00
|
|
|
else:
|
2014-11-25 20:22:31 -05:00
|
|
|
result = func(*[ss(x) if isinstance(x, (str, unicode)) else x for x in args], **kwargs)
|
2014-03-10 01:18:05 -04:00
|
|
|
|
2014-11-25 20:22:31 -05:00
|
|
|
if isinstance(result, (list, tuple)):
|
2014-03-10 01:18:05 -04:00
|
|
|
return fixListEncodings(result)
|
2014-11-25 20:22:31 -05:00
|
|
|
elif isinstance(result, str):
|
2014-12-05 23:13:50 -05:00
|
|
|
return _toUnicode(result)
|
2014-03-10 01:18:05 -04:00
|
|
|
else:
|
2014-05-14 12:18:54 -04:00
|
|
|
return result
|
2014-12-07 12:16:41 -05:00
|
|
|
|
|
|
|
def stripAccents(s):
|
|
|
|
return ''.join((c for c in unicodedata.normalize('NFD', _toUnicode(s)) if unicodedata.category(c) != 'Mn'))
|