1
0
mirror of https://github.com/moparisthebest/SickRage synced 2024-11-17 14:55:07 -05:00
SickRage/lib/tvrage_api/tvrage_api.py
echel0n 1f178686cc Fixed regex pattern for sports events that were preventing searches from working.
Switched out urllib2 HTTP Handler for requests HTTP Handler in main getURL function in helpers module.

Fixed date parsing for sports in NameParser module.

Misc fixes and improvements throughout the code.
2014-03-15 18:01:12 -07:00

653 lines
24 KiB
Python

#!/usr/bin/env python2
#encoding:utf-8
#author:echel0n
#project:tvrage_api
#repository:http://github.com/echel0n/tvrage_api
#license:unlicense (http://unlicense.org/)
"""
Modified from http://github.com/dbr/tvrage_api
Simple-to-use Python interface to The TVRage's API (tvrage.com)
"""
__author__ = "echel0n"
__version__ = "1.0"
import os
import re
import time
import urllib
import getpass
import tempfile
import warnings
import logging
import StringIO
import zipfile
import datetime as dt
try:
import xml.etree.cElementTree as ElementTree
except ImportError:
import xml.etree.ElementTree as ElementTree
from lib.dateutil.parser import parse
from lib import requests
from lib.cachecontrol.wrapper import CacheControl
from lib.cachecontrol.caches.file_cache import FileCache
from tvrage_ui import BaseUI
from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound,
tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound)
lastTimeout = None
def log():
return logging.getLogger("tvrage_api")
class ShowContainer(dict):
"""Simple dict that holds a series of Show instances
"""
def __init__(self):
self._stack = []
self._lastgc = time.time()
def __setitem__(self, key, value):
self._stack.append(key)
#keep only the 100th latest results
if time.time() - self._lastgc > 20:
tbd = self._stack[:-100]
i = 0
for o in tbd:
del self[o]
del self._stack[i]
i += 1
_lastgc = time.time()
del tbd
super(ShowContainer, self).__setitem__(key, value)
class Show(dict):
"""Holds a dict of seasons, and show data.
"""
def __init__(self):
dict.__init__(self)
self.data = {}
def __repr__(self):
return "<Show %s (containing %s seasons)>" % (
self.data.get(u'seriesname', 'instance'),
len(self)
)
def __getattr__(self, key):
if key in self:
# Key is an episode, return it
return self[key]
if key in self.data:
# Non-numeric request is for show-data
return self.data[key]
raise AttributeError
def __getitem__(self, key):
if key in self:
# Key is an episode, return it
return dict.__getitem__(self, key)
if key in self.data:
# Non-numeric request is for show-data
return dict.__getitem__(self.data, key)
# Data wasn't found, raise appropriate error
if isinstance(key, int) or key.isdigit():
# Episode number x was not found
raise tvrage_seasonnotfound("Could not find season %s" % (repr(key)))
else:
# If it's not numeric, it must be an attribute name, which
# doesn't exist, so attribute error.
raise tvrage_attributenotfound("Cannot find attribute %s" % (repr(key)))
def airedOn(self, date):
ret = self.search(str(date), 'firstaired')
if len(ret) == 0:
raise tvrage_episodenotfound("Could not find any episodes that aired on %s" % date)
return ret
def search(self, term = None, key = None):
"""
Search all episodes in show. Can search all data, or a specific key (for
example, episodename)
Always returns an array (can be empty). First index contains the first
match, and so on.
Each array index is an Episode() instance, so doing
search_results[0]['episodename'] will retrieve the episode name of the
first match.
Search terms are converted to lower case (unicode) strings.
"""
results = []
for cur_season in self.values():
searchresult = cur_season.search(term = term, key = key)
if len(searchresult) != 0:
results.extend(searchresult)
return results
class Season(dict):
def __init__(self, show = None):
"""The show attribute points to the parent show
"""
self.show = show
def __repr__(self):
return "<Season instance (containing %s episodes)>" % (
len(self.keys())
)
def __getattr__(self, episode_number):
if episode_number in self:
return self[episode_number]
raise AttributeError
def __getitem__(self, episode_number):
if episode_number not in self:
raise tvrage_episodenotfound("Could not find episode %s" % (repr(episode_number)))
else:
return dict.__getitem__(self, episode_number)
def search(self, term = None, key = None):
"""Search all episodes in season, returns a list of matching Episode
instances.
"""
results = []
for ep in self.values():
searchresult = ep.search(term = term, key = key)
if searchresult is not None:
results.append(
searchresult
)
return results
class Episode(dict):
def __init__(self, season = None):
"""The season attribute points to the parent season
"""
self.season = season
def __repr__(self):
seasno = int(self.get(u'seasonnumber', 0))
epno = int(self.get(u'episodenumber', 0))
epname = self.get(u'episodename')
if epname is not None:
return "<Episode %02dx%02d - %s>" % (seasno, epno, epname)
else:
return "<Episode %02dx%02d>" % (seasno, epno)
def __getattr__(self, key):
if key in self:
return self[key]
raise AttributeError
def __getitem__(self, key):
try:
return dict.__getitem__(self, key)
except KeyError:
raise tvrage_attributenotfound("Cannot find attribute %s" % (repr(key)))
def search(self, term = None, key = None):
"""Search episode data for term, if it matches, return the Episode (self).
The key parameter can be used to limit the search to a specific element,
for example, episodename.
This primarily for use use by Show.search and Season.search.
"""
if term == None:
raise TypeError("must supply string to search for (contents)")
term = unicode(term).lower()
for cur_key, cur_value in self.items():
cur_key, cur_value = unicode(cur_key).lower(), unicode(cur_value).lower()
if key is not None and cur_key != key:
# Do not search this key
continue
if cur_value.find( unicode(term).lower() ) > -1:
return self
class TVRage:
"""Create easy-to-use interface to name of season/episode name"""
def __init__(self,
interactive = False,
select_first = False,
debug = False,
cache = True,
banners = False,
actors = False,
custom_ui = None,
language = None,
search_all_languages = False,
apikey = None,
forceConnect=False,
useZip=False,
dvdorder=False):
"""
cache (True/False/str/unicode/urllib2 opener):
Retrieved XML are persisted to to disc. If true, stores in
tvrage_api folder under your systems TEMP_DIR, if set to
str/unicode instance it will use this as the cache
location. If False, disables caching. Can also be passed
an arbitrary Python object, which is used as a urllib2
opener, which should be created by urllib2.build_opener
forceConnect (bool):
If true it will always try to connect to tvrage.com even if we
recently timed out. By default it will wait one minute before
trying again, and any requests within that one minute window will
return an exception immediately.
"""
global lastTimeout
# if we're given a lastTimeout that is less than 1 min just give up
if not forceConnect and lastTimeout != None and dt.datetime.now() - lastTimeout < dt.timedelta(minutes=1):
raise tvrage_error("We recently timed out, so giving up early this time")
self.shows = ShowContainer() # Holds all Show classes
self.corrections = {} # Holds show-name to show_id mapping
self.config = {}
if apikey is not None:
self.config['apikey'] = apikey
else:
self.config['apikey'] = "Uhewg1Rr0o62fvZvUIZt" # tvdb_api's API key
self.config['debug_enabled'] = debug # show debugging messages
self.config['custom_ui'] = custom_ui
if cache is True:
self.config['cache_enabled'] = True
self.config['cache_location'] = self._getTempDir()
elif cache is False:
self.config['cache_enabled'] = False
elif isinstance(cache, basestring):
self.config['cache_enabled'] = True
self.config['cache_location'] = cache
else:
raise ValueError("Invalid value for Cache %r (type was %s)" % (cache, type(cache)))
if self.config['debug_enabled']:
warnings.warn("The debug argument to tvrage_api.__init__ will be removed in the next version. "
"To enable debug messages, use the following code before importing: "
"import logging; logging.basicConfig(level=logging.DEBUG)")
logging.basicConfig(level=logging.DEBUG)
# List of language from http://tvrage.com/api/0629B785CE550C8D/languages.xml
# Hard-coded here as it is realtively static, and saves another HTTP request, as
# recommended on http://tvrage.com/wiki/index.php/API:languages.xml
self.config['valid_languages'] = [
"da", "fi", "nl", "de", "it", "es", "fr","pl", "hu","el","tr",
"ru","he","ja","pt","zh","cs","sl", "hr","ko","en","sv","no"
]
# tvrage.com should be based around numeric language codes,
# but to link to a series like http://tvrage.com/?tab=series&id=79349&lid=16
# requires the language ID, thus this mapping is required (mainly
# for usage in tvrage_ui - internally tvrage_api will use the language abbreviations)
self.config['langabbv_to_id'] = {'el': 20, 'en': 7, 'zh': 27,
'it': 15, 'cs': 28, 'es': 16, 'ru': 22, 'nl': 13, 'pt': 26, 'no': 9,
'tr': 21, 'pl': 18, 'fr': 17, 'hr': 31, 'de': 14, 'da': 10, 'fi': 11,
'hu': 19, 'ja': 25, 'he': 24, 'ko': 32, 'sv': 8, 'sl': 30}
if language is None:
self.config['language'] = 'en'
else:
if language not in self.config['valid_languages']:
raise ValueError("Invalid language %s, options are: %s" % (
language, self.config['valid_languages']
))
else:
self.config['language'] = language
# The following url_ configs are based of the
# http://tvrage.com/wiki/index.php/Programmers_API
self.config['base_url'] = "http://services.tvrage.com"
self.config['url_getSeries'] = u"%(base_url)s/myfeeds/search.php" % self.config
self.config['params_getSeries'] = {"key": self.config['apikey'], "show": ""}
self.config['url_epInfo'] = u"%(base_url)s/myfeeds/episode_list.php" % self.config
self.config['params_epInfo'] = {"key": self.config['apikey'], "sid": ""}
self.config['url_seriesInfo'] = u"%(base_url)s/myfeeds/showinfo.php" % self.config
self.config['params_seriesInfo'] = {"key": self.config['apikey'], "sid": ""}
def _getTempDir(self):
"""Returns the [system temp dir]/tvrage_api-u501 (or
tvrage_api-myuser)
"""
if hasattr(os, 'getuid'):
uid = "u%d" % (os.getuid())
else:
# For Windows
try:
uid = getpass.getuser()
except ImportError:
return os.path.join(tempfile.gettempdir(), "tvrage_api")
return os.path.join(tempfile.gettempdir(), "tvrage_api-%s" % (uid))
def _loadUrl(self, url, params=None):
global lastTimeout
try:
log().debug("Retrieving URL %s" % url)
# cacheControl
if self.config['cache_enabled']:
sess = CacheControl(requests.Session(), cache_force=True, cache=FileCache(self.config['cache_location']))
else:
sess = requests.Session()
# get response from TVRage
resp = sess.get(url, params=params)
except Exception, e:
if not str(e).startswith('HTTP Error'):
lastTimeout = dt.datetime.now()
raise tvrage_error("Could not connect to server: %s" % (e))
if 'application/zip' in resp.headers.get("Content-Type", ''):
try:
# TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20]
log().debug("We recived a zip file unpacking now ...")
zipdata = StringIO.StringIO()
zipdata.write(resp.content)
myzipfile = zipfile.ZipFile(zipdata)
return myzipfile.read('%s.xml' % self.config['language'])
except zipfile.BadZipfile:
raise tvrage_error("Bad zip file received from tvrage.com, could not read it")
return resp.content
def _getetsrc(self, url, params=None):
"""Loads a URL using caching, returns an ElementTree of the source
"""
reDict = {
'showid': 'id',
'showname': 'seriesname',
'name': 'seriesname',
'summary': 'overview',
'started': 'firstaired',
'genres': 'genre',
'airtime': 'airs_time',
'airday': 'airs_dayofweek',
'image': 'fanart',
'epnum': 'id',
'title': 'episodename',
'airdate': 'firstaired',
'screencap': 'filename',
'seasonnum': 'episodenumber',
}
robj = re.compile('|'.join(reDict.keys()))
src = self._loadUrl(url, params)
try:
# TVRAGE doesn't sanitize \r (CR) from user input in some fields,
# remove it to avoid errors. Change from SickBeard, from will14m
xml = ElementTree.fromstring(src.rstrip("\r"))
tree = ElementTree.ElementTree(xml)
for elm in tree.iter():
elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag)
if elm.tag in 'firstaired':
if elm.text is "0000-00-00":
elm.text = str(dt.date.fromordinal(1))
try:
fixDate = parse(elm.text, fuzzy=True)
elm.text = fixDate.strftime("%Y-%m-%d")
except:
pass
return ElementTree.fromstring(ElementTree.tostring(xml))
except SyntaxError:
src = self._loadUrl(url, params)
try:
xml = ElementTree.fromstring(src.rstrip("\r"))
tree = ElementTree.ElementTree(xml)
for elm in tree.iter():
elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag)
if elm.tag in 'firstaired' and elm.text:
if elm.text is "0000-00-00":
elm.text = str(dt.date.fromordinal(1))
try:
#month = strptime(match.group('air_month')[:3],'%b').tm_mon
#day = re.sub("(st|nd|rd|th)", "", match.group('air_day'))
#dtStr = '%s/%s/%s' % (year, month, day)
fixDate = parse(elm.text, fuzzy=True)
elm.text = fixDate.strftime("%Y-%m-%d")
except:
pass
return ElementTree.fromstring(ElementTree.tostring(xml))
except SyntaxError, exceptionmsg:
errormsg = "There was an error with the XML retrieved from tvrage.com:\n%s" % (
exceptionmsg
)
if self.config['cache_enabled']:
errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
self.config['cache_location']
)
errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on\n"
raise tvrage_error(errormsg)
def _setItem(self, sid, seas, ep, attrib, value):
"""Creates a new episode, creating Show(), Season() and
Episode()s as required. Called by _getShowData to populate show
Since the nice-to-use tvrage[1][24]['name] interface
makes it impossible to do tvrage[1][24]['name] = "name"
and still be capable of checking if an episode exists
so we can raise tvrage_shownotfound, we have a slightly
less pretty method of setting items.. but since the API
is supposed to be read-only, this is the best way to
do it!
The problem is that calling tvrage[1][24]['episodename'] = "name"
calls __getitem__ on tvrage[1], there is no way to check if
tvrage.__dict__ should have a key "1" before we auto-create it
"""
if sid not in self.shows:
self.shows[sid] = Show()
if seas not in self.shows[sid]:
self.shows[sid][seas] = Season(show = self.shows[sid])
if ep not in self.shows[sid][seas]:
self.shows[sid][seas][ep] = Episode(season = self.shows[sid][seas])
self.shows[sid][seas][ep][attrib] = value
def _setShowData(self, sid, key, value):
"""Sets self.shows[sid] to a new Show instance, or sets the data
"""
if sid not in self.shows:
self.shows[sid] = Show()
self.shows[sid].data[key] = value
def _cleanData(self, data):
"""Cleans up strings returned by tvrage.com
Issues corrected:
- Replaces &amp; with &
- Trailing whitespace
"""
data = data.replace(u"&amp;", u"&")
data = data.strip()
return data
def search(self, series):
"""This searches tvrage.com for the series name
and returns the result list
"""
series = urllib.quote(series.encode("utf-8"))
log().debug("Searching for show %s" % series)
self.config['params_getSeries']['show'] = series
seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
allSeries = []
seriesResult = {}
for series in seriesEt:
for k in series.getchildren():
seriesResult.setdefault(k.tag.lower(), k.text)
seriesResult['id'] = int(seriesResult['id'])
log().debug('Found series %s' % seriesResult['seriesname'])
allSeries.append(seriesResult)
return allSeries
def _getSeries(self, series):
"""This searches tvrage.com for the series name,
If a custom_ui UI is configured, it uses this to select the correct
series. If not, and interactive == True, ConsoleUI is used, if not
BaseUI is used to select the first result.
"""
allSeries = self.search(series)
if len(allSeries) == 0:
log().debug('Series result returned zero')
raise tvrage_shownotfound("Show-name search returned zero results (cannot find show on TVRAGE)")
if self.config['custom_ui'] is not None:
log().debug("Using custom UI %s" % (repr(self.config['custom_ui'])))
ui = self.config['custom_ui'](config = self.config)
else:
log().debug('Auto-selecting first search result using BaseUI')
ui = BaseUI(config = self.config)
return ui.selectSeries(allSeries)
def _getShowData(self, sid):
"""Takes a series ID, gets the epInfo URL and parses the TVRAGE
XML file into the shows dict in layout:
shows[series_id][season_number][episode_number]
"""
# Parse show information
log().debug('Getting all series data for %s' % (sid))
self.config['params_seriesInfo']['sid'] = sid
seriesInfoEt = self._getetsrc(
self.config['url_seriesInfo'],
self.config['params_seriesInfo']
)
for curInfo in seriesInfoEt:
tag = curInfo.tag.lower()
value = curInfo.text
if value is not None:
value = self._cleanData(value)
self._setShowData(sid, tag, value)
try:
# Parse genre data
log().debug('Getting genres of %s' % (sid))
for genre in seriesInfoEt.find('genres'):
tag = genre.tag.lower()
value = genre.text
if value is not None:
value = self._cleanData(value)
self._setShowData(sid, tag, value)
except Exception:
log().debug('No genres for %s' % (sid))
# Parse episode data
log().debug('Getting all episodes of %s' % (sid))
self.config['params_epInfo']['sid'] = sid
epsEt = self._getetsrc(self.config['url_epInfo'], self.config['params_epInfo'])
for cur_list in epsEt.findall("Episodelist"):
for cur_seas in cur_list:
try:
seas_no = int(cur_seas.attrib['no'])
for cur_ep in cur_seas:
ep_no = int(cur_ep.find('episodenumber').text)
self._setItem(sid, seas_no, ep_no, 'seasonnumber', seas_no)
for cur_item in cur_ep:
tag = cur_item.tag.lower()
value = cur_item.text
if value is not None:
value = self._cleanData(value)
self._setItem(sid, seas_no, ep_no, tag, value)
except:
continue
def _nameToSid(self, name):
"""Takes show name, returns the correct series ID (if the show has
already been grabbed), or grabs all episodes and returns
the correct SID.
"""
if name in self.corrections:
log().debug('Correcting %s to %s' % (name, self.corrections[name]) )
sid = self.corrections[name]
else:
log().debug('Getting show %s' % (name))
selected_series = self._getSeries( name )
sname, sid = selected_series['seriesname'], selected_series['id']
log().debug('Got %(seriesname)s, id %(id)s' % selected_series)
self.corrections[name] = sid
self._getShowData(selected_series['id'])
return sid
def __getitem__(self, key):
"""Handles tvrage_instance['seriesname'] calls.
The dict index should be the show id
"""
if isinstance(key, (int, long)):
# Item is integer, treat as show id
if key not in self.shows:
self._getShowData(key)
return self.shows[key]
key = key.lower() # make key lower case
sid = self._nameToSid(key)
log().debug('Got series id %s' % (sid))
return self.shows[sid]
def __repr__(self):
return str(self.shows)
def main():
"""Simple example of using tvrage_api - it just
grabs an episode name interactively.
"""
import logging
logging.basicConfig(level=logging.DEBUG)
tvrage_instance = TVRage(cache=False)
print tvrage_instance['Lost']['seriesname']
print tvrage_instance['Lost'][1][4]['episodename']
if __name__ == '__main__':
main()