mirror of
https://github.com/moparisthebest/SickRage
synced 2024-10-31 15:35:01 -04:00
Update imdbpy libs to v5.0
Fixed invalid indexer id issues for TVRage shows. Fixed issues for getting posters and backdrops for TVRage shows. We now convert XML straight to a dict object for Indexer APIs, improved overall performance api's Fixed issues with TVRage shows and displaying genre's properly.
This commit is contained in:
parent
764cf6e62e
commit
2dcd26e69c
@ -6,7 +6,7 @@ a person from the IMDb database.
|
||||
It can fetch data through different media (e.g.: the IMDb web pages,
|
||||
a SQL database, etc.)
|
||||
|
||||
Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2004-2014 Davide Alberani <da@erlug.linux.it>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -25,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
__all__ = ['IMDb', 'IMDbError', 'Movie', 'Person', 'Character', 'Company',
|
||||
'available_access_systems']
|
||||
__version__ = VERSION = '4.9'
|
||||
__version__ = VERSION = '5.0'
|
||||
|
||||
# Import compatibility module (importing it is enough).
|
||||
import _compat
|
||||
@ -160,6 +160,7 @@ def IMDb(accessSystem=None, *arguments, **keywords):
|
||||
kwds.update(keywords)
|
||||
keywords = kwds
|
||||
except Exception, e:
|
||||
import logging
|
||||
logging.getLogger('imdbpy').warn('Unable to read configuration' \
|
||||
' file; complete error: %s' % e)
|
||||
# It just LOOKS LIKE a bad habit: we tried to read config
|
||||
@ -303,7 +304,7 @@ class IMDbBase:
|
||||
# http://akas.imdb.com/keyword/%s/
|
||||
imdbURL_keyword_main=imdbURL_base + 'keyword/%s/'
|
||||
# http://akas.imdb.com/chart/top
|
||||
imdbURL_top250=imdbURL_base + 'chart/top',
|
||||
imdbURL_top250=imdbURL_base + 'chart/top'
|
||||
# http://akas.imdb.com/chart/bottom
|
||||
imdbURL_bottom100=imdbURL_base + 'chart/bottom'
|
||||
# http://akas.imdb.com/find?%s
|
||||
@ -824,22 +825,23 @@ class IMDbBase:
|
||||
# subclass, somewhere under the imdb.parser package.
|
||||
raise NotImplementedError('override this method')
|
||||
|
||||
def _searchIMDb(self, kind, ton):
|
||||
def _searchIMDb(self, kind, ton, title_kind=None):
|
||||
"""Search the IMDb akas server for the given title or name."""
|
||||
# The Exact Primary search system has gone AWOL, so we resort
|
||||
# to the mobile search. :-/
|
||||
if not ton:
|
||||
return None
|
||||
ton = ton.strip('"')
|
||||
aSystem = IMDb('mobile')
|
||||
if kind == 'tt':
|
||||
searchFunct = aSystem.search_movie
|
||||
check = 'long imdb canonical title'
|
||||
check = 'long imdb title'
|
||||
elif kind == 'nm':
|
||||
searchFunct = aSystem.search_person
|
||||
check = 'long imdb canonical name'
|
||||
check = 'long imdb name'
|
||||
elif kind == 'char':
|
||||
searchFunct = aSystem.search_character
|
||||
check = 'long imdb canonical name'
|
||||
check = 'long imdb name'
|
||||
elif kind == 'co':
|
||||
# XXX: are [COUNTRY] codes included in the results?
|
||||
searchFunct = aSystem.search_company
|
||||
@ -852,24 +854,42 @@ class IMDbBase:
|
||||
# exact match.
|
||||
if len(searchRes) == 1:
|
||||
return searchRes[0].getID()
|
||||
title_only_matches = []
|
||||
for item in searchRes:
|
||||
# Return the first perfect match.
|
||||
if item[check] == ton:
|
||||
if item[check].strip('"') == ton:
|
||||
# For titles do additional check for kind
|
||||
if kind != 'tt' or title_kind == item['kind']:
|
||||
return item.getID()
|
||||
elif kind == 'tt':
|
||||
title_only_matches.append(item.getID())
|
||||
# imdbpy2sql.py could detected wrong type, so if no title and kind
|
||||
# matches found - collect all results with title only match
|
||||
# Return list of IDs if multiple matches (can happen when searching
|
||||
# titles with no title_kind specified)
|
||||
# Example: DB: Band of Brothers "tv series" vs "tv mini-series"
|
||||
if title_only_matches:
|
||||
if len(title_only_matches) == 1:
|
||||
return title_only_matches[0]
|
||||
else:
|
||||
return title_only_matches
|
||||
return None
|
||||
|
||||
def title2imdbID(self, title):
|
||||
def title2imdbID(self, title, kind=None):
|
||||
"""Translate a movie title (in the plain text data files format)
|
||||
to an imdbID.
|
||||
Try an Exact Primary Title search on IMDb;
|
||||
return None if it's unable to get the imdbID."""
|
||||
return self._searchIMDb('tt', title)
|
||||
return None if it's unable to get the imdbID;
|
||||
Always specify kind: movie, tv series, video game etc. or search can
|
||||
return list of IDs if multiple matches found
|
||||
"""
|
||||
return self._searchIMDb('tt', title, kind)
|
||||
|
||||
def name2imdbID(self, name):
|
||||
"""Translate a person name in an imdbID.
|
||||
Try an Exact Primary Name search on IMDb;
|
||||
return None if it's unable to get the imdbID."""
|
||||
return self._searchIMDb('tt', name)
|
||||
return self._searchIMDb('nm', name)
|
||||
|
||||
def character2imdbID(self, name):
|
||||
"""Translate a character name in an imdbID.
|
||||
@ -896,7 +916,8 @@ class IMDbBase:
|
||||
imdbID = aSystem.get_imdbMovieID(mop.movieID)
|
||||
else:
|
||||
imdbID = aSystem.title2imdbID(build_title(mop, canonical=0,
|
||||
ptdf=1))
|
||||
ptdf=0, appendKind=False),
|
||||
mop['kind'])
|
||||
elif isinstance(mop, Person.Person):
|
||||
if mop.personID is not None:
|
||||
imdbID = aSystem.get_imdbPersonID(mop.personID)
|
||||
|
@ -29,7 +29,7 @@
|
||||
|
||||
[imdbpy]
|
||||
## Default.
|
||||
accessSystem = mobile
|
||||
accessSystem = http
|
||||
|
||||
## Optional (options common to every data access system):
|
||||
# Activate adult searches (on, by default).
|
||||
@ -37,7 +37,7 @@ accessSystem = mobile
|
||||
# Number of results for searches (20 by default).
|
||||
#results = 20
|
||||
# Re-raise all caught exceptions (off, by default).
|
||||
reraiseExceptions = on
|
||||
#reraiseExceptions = off
|
||||
|
||||
## Optional (options common to http and mobile data access systems):
|
||||
# Proxy used to access the network. If it requires authentication,
|
||||
@ -69,7 +69,7 @@ reraiseExceptions = on
|
||||
## Set the threshold for logging messages.
|
||||
# Can be one of "debug", "info", "warning", "error", "critical" (default:
|
||||
# "warning").
|
||||
loggingLevel = info
|
||||
#loggingLevel = debug
|
||||
|
||||
## Path to a configuration file for the logging facility;
|
||||
# see: http://docs.python.org/library/logging.html#configuring-logging
|
||||
|
@ -64,8 +64,10 @@ LANG_ARTICLES = {
|
||||
'English': ('the', 'a', 'an'),
|
||||
'Italian': ('la', 'le', "l'", 'il', 'i', 'un', 'una', 'gli', 'lo', "un'",
|
||||
'uno'),
|
||||
'Spanish': ('la', 'le', 'el', 'les', 'un', 'los', 'una', 'uno', 'unos',
|
||||
'unas'),
|
||||
'Spanish': ('la', 'lo', 'el', 'las', 'un', 'los', 'una', 'al', 'del',
|
||||
'unos', 'unas', 'uno'),
|
||||
'French': ('le', "l'", 'la', 'les', 'un', 'une', 'des', 'au', 'du', '\xc3\xa0 la',
|
||||
'de la', 'aux'),
|
||||
'Portuguese': ('a', 'as', 'o', 'os', 'um', 'uns', 'uma', 'umas'),
|
||||
'Turkish': (), # Some languages doesn't have articles.
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python2
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
generatepot.py script.
|
||||
|
||||
|
1303
lib/imdb/locale/imdbpy-ar.po
Normal file
1303
lib/imdb/locale/imdbpy-ar.po
Normal file
File diff suppressed because it is too large
Load Diff
1303
lib/imdb/locale/imdbpy-bg.po
Normal file
1303
lib/imdb/locale/imdbpy-bg.po
Normal file
File diff suppressed because it is too large
Load Diff
1303
lib/imdb/locale/imdbpy-de.po
Normal file
1303
lib/imdb/locale/imdbpy-de.po
Normal file
File diff suppressed because it is too large
Load Diff
1304
lib/imdb/locale/imdbpy-es.po
Normal file
1304
lib/imdb/locale/imdbpy-es.po
Normal file
File diff suppressed because it is too large
Load Diff
1304
lib/imdb/locale/imdbpy-fr.po
Normal file
1304
lib/imdb/locale/imdbpy-fr.po
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python2
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
"""Generate binary message catalog from textual translation description.
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python2
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
rebuildmo.py script.
|
||||
|
||||
|
@ -104,15 +104,24 @@ PY_VERSION = sys.version_info[:2]
|
||||
# The cookies for the "adult" search.
|
||||
# Please don't mess with these account.
|
||||
# Old 'IMDbPY' account.
|
||||
_old_cookie_id = 'boM2bYxz9MCsOnH9gZ0S9QHs12NWrNdApxsls1Vb5/NGrNdjcHx3dUas10UASoAjVEvhAbGagERgOpNkAPvxdbfKwaV2ikEj9SzXY1WPxABmDKQwdqzwRbM+12NSeJFGUEx3F8as10WwidLzVshDtxaPIbP13NdjVS9UZTYqgTVGrNcT9vyXU1'
|
||||
_old_cookie_uu = '3M3AXsquTU5Gur/Svik+ewflPm5Rk2ieY3BIPlLjyK3C0Dp9F8UoPgbTyKiGtZp4x1X+uAUGKD7BM2g+dVd8eqEzDErCoYvdcvGLvVLAen1y08hNQtALjVKAe+1hM8g9QbNonlG1/t4S82ieUsBbrSIQbq1yhV6tZ6ArvSbA7rgHc8n5AdReyAmDaJ5Wm/ee3VDoCnGj/LlBs2ieUZNorhHDKK5Q=='
|
||||
# New 'IMDbPYweb' account.
|
||||
_cookie_id = 'rH1jNAkjTlNXvHolvBVBsgaPICNZbNdjVjzFwzas9JRmusdjVoqBs/Hs12NR+1WFxEoR9bGKEDUg6sNlADqXwkas12N131Rwdb+UQNGKN8PWrNdjcdqBQVLq8mbGDHP3hqzxhbD692NQi9D0JjpBtRaPIbP1zNdjUOqENQYv1ADWrNcT9vyXU1'
|
||||
_cookie_uu = 'su4/m8cho4c6HP+W1qgq6wchOmhnF0w+lIWvHjRUPJ6nRA9sccEafjGADJ6hQGrMd4GKqLcz2X4z5+w+M4OIKnRn7FpENH7dxDQu3bQEHyx0ZEyeRFTPHfQEX03XF+yeN1dsPpcXaqjUZAw+lGRfXRQEfz3RIX9IgVEffdBAHw2wQXyf9xdMPrQELw0QNB8dsffsqcdQemjPB0w+moLcPh0JrKrHJ9hjBzdMPpcXTH7XRwwOk='
|
||||
_IMDbPY_cookie_id = 'boM2bYxz9MCsOnH9gZ0S9QHs12NWrNdApxsls1Vb5/NGrNdjcHx3dUas10UASoAjVEvhAbGagERgOpNkAPvxdbfKwaV2ikEj9SzXY1WPxABmDKQwdqzwRbM+12NSeJFGUEx3F8as10WwidLzVshDtxaPIbP13NdjVS9UZTYqgTVGrNcT9vyXU1'
|
||||
_IMDbPY_cookie_uu = '3M3AXsquTU5Gur/Svik+ewflPm5Rk2ieY3BIPlLjyK3C0Dp9F8UoPgbTyKiGtZp4x1X+uAUGKD7BM2g+dVd8eqEzDErCoYvdcvGLvVLAen1y08hNQtALjVKAe+1hM8g9QbNonlG1/t4S82ieUsBbrSIQbq1yhV6tZ6ArvSbA7rgHc8n5AdReyAmDaJ5Wm/ee3VDoCnGj/LlBs2ieUZNorhHDKK5Q=='
|
||||
# 'imdbpy2010' account.
|
||||
_imdbpy2010_cookie_id = 'QrCdxVi+L+WgqOLrQJJgBgRRXGInphxiBPU/YXSFDyExMFzCp6YcYgSVXyEUhS/xMID8wqemHGID4DlntwZ49vemP5UXsAxiJ4D6goSmHGIgNT9hMXBaRSF2vMS3phxB0bVfQiQlP1RxdrzhB6YcRHFASyIhQVowwXCKtDSlD2YhgRvxBsCKtGemHBKH9mxSI='
|
||||
_imdbpy2010_cookie_uu = 'oiEo2yoJFCA2Zbn/o7Z1LAPIwotAu6QdALv3foDb1x5F/tdrFY63XkSfty4kntS8Y8jkHSDLt3406+d+JThEilPI0mtTaOQdA/t2/iErp22jaLdeVU5ya4PIREpj7HFdpzhEHadcIAngSER50IoHDpD6Bz4Qy3b+UIhE/hBbhz5Q63ceA2hEvhPo5B0FnrL9Q8jkWjDIbA0Au3d+AOtnXoCIRL4Q28c+UOtnXpP4RL4T6OQdA+6ijUCI5B0AW2d+UOtnXpPYRL4T6OQdA8jkTUOYlC0A=='
|
||||
# old 'IMDbPYweb' account.
|
||||
_old_IMDbPYweb_cookie_id = 'rH1jNAkjTlNXvHolvBVBsgaPICNZbNdjVjzFwzas9JRmusdjVoqBs/Hs12NR+1WFxEoR9bGKEDUg6sNlADqXwkas12N131Rwdb+UQNGKN8PWrNdjcdqBQVLq8mbGDHP3hqzxhbD692NQi9D0JjpBtRaPIbP1zNdjUOqENQYv1ADWrNcT9vyXU1'
|
||||
_old_IMDbPYweb_cookie_uu = 'su4/m8cho4c6HP+W1qgq6wchOmhnF0w+lIWvHjRUPJ6nRA9sccEafjGADJ6hQGrMd4GKqLcz2X4z5+w+M4OIKnRn7FpENH7dxDQu3bQEHyx0ZEyeRFTPHfQEX03XF+yeN1dsPpcXaqjUZAw+lGRfXRQEfz3RIX9IgVEffdBAHw2wQXyf9xdMPrQELw0QNB8dsffsqcdQemjPB0w+moLcPh0JrKrHJ9hjBzdMPpcXTH7XRwwOk='
|
||||
# old 'IMDbPYweb' account values (as of 2012-12-30)
|
||||
_IMDbPYweb_cookie_id = 'BCYjtpb46Go0cMHAMewWZEauhwqPL7ASCPpPVNutu6BuayHZd0U6Dk3UAqVlEM8DHLDsSr02RGQn5ff3245-R4A130NAWJ_5yqXx7X-zJey8vQM8JKdv3rTUSEJznJQlojUW1Bije-Q0FXAixs4I0sePWhd_tA41i-9AF2q3lPmaksram6ilMhN9i3IPESW1PMbk'
|
||||
_IMDbPYweb_cookie_uu = 'BCYttQjEMc-NyUdFUGxThidAnBo7wwalEzj4un9uzf2XoEjtqDhNfrH7bOSuwlRkMEQ11SNyTajl-b9Q-21m4HwYu0e3jXZrjYLXLYzFkrEroCDyUREqaTwPJPSjGtFmvlaVBZEZmsWpaxe18DT5KiygKyGPZKH78Xu4im6ba-Sd31WvbXHzP8KGXPpGjhhVuv7Dcv314HCWkE832Srf9ya-Uv0FdGAmYyLbIAXuxnvpYQd6oZ8-CYkSGLIqcKWdrf5S'
|
||||
# 'IMDbPY2013' account
|
||||
_IMDbPY2013_cookie_id = 'BCYmoyqSm2WglmOzG-SrFWSvVpxsTZOB0qEOOqmAwCBxCbaNgKOxd0DTKzUvt7t04Pya5gV2tUrpDmYxrc1Dr54DQj2UXI7QI35__M5-HI2KrbOI3PjDz6M-_U3HG8topMfN64R24tmBixoZhMYXVaEc556lf0Z4gQNJVYRANXvwytP5v1lpfeToRlu9aVJwN4kT'
|
||||
_IMDbPY2013_cookie_uu = 'BCYquDS8Y2i8R1pJxS4nB77YrhjHHXeOea2Xl9KtZvE6RZKVfMvzTGU4Vl5-yxfPbgRSiFJasyf-hhPuVvXyaHlfeBjNlbFT8hz2HzFFkQ_SxKxq05J51gi7Fv4SaAws1M-i7zmQ1TRunfJqCVIYqPwIs2NO7s4_YDH2ZoISVGLgca8OY2K58HychOZB1oRWHVeAJNhLJMrCWJBuGRLCNnQK5X9tA0dPPntr2Ussy0ouul-N1GQz-8y5vda3JJ_C6xkwmHcA6JrOdOFO_HqMWjVSXuxGEdrXC919JM9H0vooVvKeVgAEJnTh2GiVlUJUoH3c'
|
||||
|
||||
# imdbpy2010 account.
|
||||
#_cookie_id = 'QrCdxVi+L+WgqOLrQJJgBgRRXGInphxiBPU/YXSFDyExMFzCp6YcYgSVXyEUhS/xMID8wqemHGID4DlntwZ49vemP5UXsAxiJ4D6goSmHGIgNT9hMXBaRSF2vMS3phxB0bVfQiQlP1RxdrzhB6YcRHFASyIhQVowwXCKtDSlD2YhgRvxBsCKtGemHBKH9mxSI='
|
||||
#_cookie_uu = 'oiEo2yoJFCA2Zbn/o7Z1LAPIwotAu6QdALv3foDb1x5F/tdrFY63XkSfty4kntS8Y8jkHSDLt3406+d+JThEilPI0mtTaOQdA/t2/iErp22jaLdeVU5ya4PIREpj7HFdpzhEHadcIAngSER50IoHDpD6Bz4Qy3b+UIhE/hBbhz5Q63ceA2hEvhPo5B0FnrL9Q8jkWjDIbA0Au3d+AOtnXoCIRL4Q28c+UOtnXpP4RL4T6OQdA+6ijUCI5B0AW2d+UOtnXpPYRL4T6OQdA8jkTUOYlC0A=='
|
||||
# Currently used account.
|
||||
_cookie_id = _IMDbPY2013_cookie_id
|
||||
_cookie_uu = _IMDbPY2013_cookie_uu
|
||||
|
||||
|
||||
class _FakeURLOpener(object):
|
||||
@ -141,9 +150,10 @@ class IMDbURLopener(FancyURLopener):
|
||||
for header in ('User-Agent', 'User-agent', 'user-agent'):
|
||||
self.del_header(header)
|
||||
self.set_header('User-Agent', 'Mozilla/5.0')
|
||||
self.set_header('Accept-Language', 'en-us,en;q=0.5')
|
||||
# XXX: This class is used also to perform "Exact Primary
|
||||
# [Title|Name]" searches, and so by default the cookie is set.
|
||||
c_header = 'id=%s; uu=%s' % (_cookie_id, _cookie_uu)
|
||||
c_header = 'uu=%s; id=%s' % (_cookie_uu, _cookie_id)
|
||||
self.set_header('Cookie', c_header)
|
||||
|
||||
def get_proxy(self):
|
||||
@ -199,12 +209,11 @@ class IMDbURLopener(FancyURLopener):
|
||||
server_encode = uopener.info().getparam('charset')
|
||||
# Otherwise, look at the content-type HTML meta tag.
|
||||
if server_encode is None and content:
|
||||
first_bytes = content[:512]
|
||||
begin_h = first_bytes.find('text/html; charset=')
|
||||
begin_h = content.find('text/html; charset=')
|
||||
if begin_h != -1:
|
||||
end_h = first_bytes[19+begin_h:].find('"')
|
||||
end_h = content[19+begin_h:].find('"')
|
||||
if end_h != -1:
|
||||
server_encode = first_bytes[19+begin_h:19+begin_h+end_h]
|
||||
server_encode = content[19+begin_h:19+begin_h+end_h]
|
||||
if server_encode:
|
||||
try:
|
||||
if lookup(server_encode):
|
||||
@ -455,16 +464,16 @@ class IMDbHTTPAccessSystem(IMDbBase):
|
||||
results is the maximum number of results to be retrieved."""
|
||||
if isinstance(ton, unicode):
|
||||
try:
|
||||
ton = ton.encode('iso8859-1')
|
||||
ton = ton.encode('utf-8')
|
||||
except Exception, e:
|
||||
try:
|
||||
ton = ton.encode('utf-8')
|
||||
ton = ton.encode('iso8859-1')
|
||||
except Exception, e:
|
||||
pass
|
||||
##params = 'q=%s&%s=on&mx=%s' % (quote_plus(ton), kind, str(results))
|
||||
params = 'q=%s;s=%s;mx=%s' % (quote_plus(ton), kind, str(results))
|
||||
params = 'q=%s&s=%s&mx=%s' % (quote_plus(ton), kind, str(results))
|
||||
if kind == 'ep':
|
||||
params = params.replace('s=ep;', 's=tt;ttype=ep;', 1)
|
||||
params = params.replace('s=ep&', 's=tt&ttype=ep&', 1)
|
||||
cont = self._retrieve(self.urls['find'] % params)
|
||||
#print 'URL:', imdbURL_find % params
|
||||
if cont.find('Your search returned more than') == -1 or \
|
||||
@ -472,7 +481,7 @@ class IMDbHTTPAccessSystem(IMDbBase):
|
||||
return cont
|
||||
# The retrieved page contains no results, because too many
|
||||
# titles or names contain the string we're looking for.
|
||||
params = 'q=%s;ls=%s;lm=0' % (quote_plus(ton), kind)
|
||||
params = 'q=%s&ls=%s&lm=0' % (quote_plus(ton), kind)
|
||||
size = 131072 + results * 512
|
||||
return self._retrieve(self.urls['find'] % params, size=size)
|
||||
|
||||
@ -587,6 +596,10 @@ class IMDbHTTPAccessSystem(IMDbBase):
|
||||
cont = self._retrieve(self.urls['movie_main'] % movieID + 'recommendations')
|
||||
return self.mProxy.rec_parser.parse(cont)
|
||||
|
||||
def get_movie_critic_reviews(self, movieID):
|
||||
cont = self._retrieve(self.urls['movie_main'] % movieID + 'criticreviews')
|
||||
return self.mProxy.criticrev_parser.parse(cont)
|
||||
|
||||
def get_movie_external_reviews(self, movieID):
|
||||
cont = self._retrieve(self.urls['movie_main'] % movieID + 'externalreviews')
|
||||
return self.mProxy.externalrev_parser.parse(cont)
|
||||
@ -754,7 +767,7 @@ class IMDbHTTPAccessSystem(IMDbBase):
|
||||
return self.pProxy.person_keywords_parser.parse(cont)
|
||||
|
||||
def _search_character(self, name, results):
|
||||
cont = self._get_search_content('char', name, results)
|
||||
cont = self._get_search_content('ch', name, results)
|
||||
return self.scProxy.search_character_parser.parse(cont, results=results)['data']
|
||||
|
||||
def get_character_main(self, characterID):
|
||||
|
@ -9,7 +9,7 @@ pages would be:
|
||||
plot summary: http://akas.imdb.com/title/tt0094226/plotsummary
|
||||
...and so on...
|
||||
|
||||
Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
@ -531,9 +531,6 @@ class DOMHTMLMovieParser(DOMParserBase):
|
||||
def _process_plotsummary(x):
|
||||
"""Process a plot (contributed by Rdian06)."""
|
||||
xauthor = x.get('author')
|
||||
if xauthor:
|
||||
xauthor = xauthor.replace('{', '<').replace('}', '>').replace('(',
|
||||
'<').replace(')', '>').strip()
|
||||
xplot = x.get('plot', u'').strip()
|
||||
if xauthor:
|
||||
xplot += u'::%s' % xauthor
|
||||
@ -555,17 +552,20 @@ class DOMHTMLPlotParser(DOMParserBase):
|
||||
# Notice that recently IMDb started to put the email of the
|
||||
# author only in the link, that we're not collecting, here.
|
||||
extractors = [Extractor(label='plot',
|
||||
path="//p[@class='plotpar']",
|
||||
path="//ul[@class='zebraList']//p",
|
||||
attrs=Attribute(key='plot',
|
||||
multi=True,
|
||||
path={'plot': './text()',
|
||||
'author': './i/a/text()'},
|
||||
path={'plot': './text()[1]',
|
||||
'author': './span/em/a/text()'},
|
||||
postprocess=_process_plotsummary))]
|
||||
|
||||
|
||||
def _process_award(x):
|
||||
award = {}
|
||||
award['award'] = x.get('award').strip()
|
||||
_award = x.get('award')
|
||||
if _award is not None:
|
||||
_award = _award.strip()
|
||||
award['award'] = _award
|
||||
if not award['award']:
|
||||
return {}
|
||||
award['year'] = x.get('year').strip()
|
||||
@ -709,10 +709,16 @@ class DOMHTMLTaglinesParser(DOMParserBase):
|
||||
result = tparser.parse(taglines_html_string)
|
||||
"""
|
||||
extractors = [Extractor(label='taglines',
|
||||
path="//div[@id='tn15content']/p",
|
||||
attrs=Attribute(key='taglines', multi=True,
|
||||
path='//*[contains(concat(" ", normalize-space(@class), " "), " soda ")]',
|
||||
attrs=Attribute(key='taglines',
|
||||
multi=True,
|
||||
path="./text()"))]
|
||||
|
||||
def postprocess_data(self, data):
|
||||
if 'taglines' in data:
|
||||
data['taglines'] = [tagline.strip() for tagline in data['taglines']]
|
||||
return data
|
||||
|
||||
|
||||
class DOMHTMLKeywordsParser(DOMParserBase):
|
||||
"""Parser for the "keywords" page of a given movie.
|
||||
@ -785,9 +791,9 @@ class DOMHTMLSoundtrackParser(DOMHTMLAlternateVersionsParser):
|
||||
]
|
||||
|
||||
def postprocess_data(self, data):
|
||||
if 'soundtrack' in data:
|
||||
if 'alternate versions' in data:
|
||||
nd = []
|
||||
for x in data['soundtrack']:
|
||||
for x in data['alternate versions']:
|
||||
ds = x.split('\n')
|
||||
title = ds[0]
|
||||
if title[0] == '"' and title[-1] == '"':
|
||||
@ -846,6 +852,13 @@ class DOMHTMLCrazyCreditsParser(DOMParserBase):
|
||||
x.replace('\n', ' ').replace(' ', ' ')))]
|
||||
|
||||
|
||||
def _process_goof(x):
|
||||
if x['spoiler_category']:
|
||||
return x['spoiler_category'].strip() + ': SPOILER: ' + x['text'].strip()
|
||||
else:
|
||||
return x['category'].strip() + ': ' + x['text'].strip()
|
||||
|
||||
|
||||
class DOMHTMLGoofsParser(DOMParserBase):
|
||||
"""Parser for the "goofs" page of a given movie.
|
||||
The page should be provided as a string, as taken from
|
||||
@ -858,9 +871,14 @@ class DOMHTMLGoofsParser(DOMParserBase):
|
||||
"""
|
||||
_defGetRefs = True
|
||||
|
||||
extractors = [Extractor(label='goofs', path="//ul[@class='trivia']/li",
|
||||
attrs=Attribute(key='goofs', multi=True, path=".//text()",
|
||||
postprocess=lambda x: (x or u'').strip()))]
|
||||
extractors = [Extractor(label='goofs', path="//div[@class='soda odd']",
|
||||
attrs=Attribute(key='goofs', multi=True,
|
||||
path={
|
||||
'text':"./text()",
|
||||
'category':'./preceding-sibling::h4[1]/text()',
|
||||
'spoiler_category': './h4/text()'
|
||||
},
|
||||
postprocess=_process_goof))]
|
||||
|
||||
|
||||
class DOMHTMLQuotesParser(DOMParserBase):
|
||||
@ -876,9 +894,16 @@ class DOMHTMLQuotesParser(DOMParserBase):
|
||||
_defGetRefs = True
|
||||
|
||||
extractors = [
|
||||
Extractor(label='quotes',
|
||||
path="//div[@class='_imdbpy']",
|
||||
attrs=Attribute(key='quotes',
|
||||
Extractor(label='quotes_odd',
|
||||
path="//div[@class='quote soda odd']",
|
||||
attrs=Attribute(key='quotes_odd',
|
||||
multi=True,
|
||||
path=".//text()",
|
||||
postprocess=lambda x: x.strip().replace(' \n',
|
||||
'::').replace('::\n', '::').replace('\n', ' '))),
|
||||
Extractor(label='quotes_even',
|
||||
path="//div[@class='quote soda even']",
|
||||
attrs=Attribute(key='quotes_even',
|
||||
multi=True,
|
||||
path=".//text()",
|
||||
postprocess=lambda x: x.strip().replace(' \n',
|
||||
@ -886,27 +911,23 @@ class DOMHTMLQuotesParser(DOMParserBase):
|
||||
]
|
||||
|
||||
preprocessors = [
|
||||
(re.compile('(<a name="?qt[0-9]{7}"?></a>)', re.I),
|
||||
r'\1<div class="_imdbpy">'),
|
||||
(re.compile('<hr width="30%">', re.I), '</div>'),
|
||||
(re.compile('<hr/>', re.I), '</div>'),
|
||||
(re.compile('<script.*?</script>', re.I|re.S), ''),
|
||||
# For BeautifulSoup.
|
||||
(re.compile('<!-- sid: t-channel : MIDDLE_CENTER -->', re.I), '</div>')
|
||||
(re.compile('<a href="#" class="hidesoda hidden">Hide options</a><br>', re.I), '')
|
||||
]
|
||||
|
||||
def preprocess_dom(self, dom):
|
||||
# Remove "link this quote" links.
|
||||
for qLink in self.xpath(dom, "//p[@class='linksoda']"):
|
||||
for qLink in self.xpath(dom, "//span[@class='linksoda']"):
|
||||
qLink.drop_tree()
|
||||
for qLink in self.xpath(dom, "//div[@class='sharesoda_pre']"):
|
||||
qLink.drop_tree()
|
||||
return dom
|
||||
|
||||
def postprocess_data(self, data):
|
||||
if 'quotes' not in data:
|
||||
quotes = data.get('quotes_odd', []) + data.get('quotes_even', [])
|
||||
if not quotes:
|
||||
return {}
|
||||
for idx, quote in enumerate(data['quotes']):
|
||||
data['quotes'][idx] = quote.split('::')
|
||||
return data
|
||||
quotes = [q.split('::') for q in quotes]
|
||||
return {'quotes': quotes}
|
||||
|
||||
|
||||
class DOMHTMLReleaseinfoParser(DOMParserBase):
|
||||
@ -920,13 +941,13 @@ class DOMHTMLReleaseinfoParser(DOMParserBase):
|
||||
result = rdparser.parse(releaseinfo_html_string)
|
||||
"""
|
||||
extractors = [Extractor(label='release dates',
|
||||
path="//th[@class='xxxx']/../../tr",
|
||||
path="//table[@id='release_dates']//tr",
|
||||
attrs=Attribute(key='release dates', multi=True,
|
||||
path={'country': ".//td[1]//text()",
|
||||
'date': ".//td[2]//text()",
|
||||
'notes': ".//td[3]//text()"})),
|
||||
Extractor(label='akas',
|
||||
path="//div[@class='_imdbpy_akas']/table/tr",
|
||||
path="//table[@id='akas']//tr",
|
||||
attrs=Attribute(key='akas', multi=True,
|
||||
path={'title': "./td[1]/text()",
|
||||
'countries': "./td[2]/text()"}))]
|
||||
@ -961,7 +982,7 @@ class DOMHTMLReleaseinfoParser(DOMParserBase):
|
||||
title = (aka.get('title') or '').strip()
|
||||
if not title:
|
||||
continue
|
||||
countries = (aka.get('countries') or '').split('/')
|
||||
countries = (aka.get('countries') or '').split(',')
|
||||
if not countries:
|
||||
nakas.append(title)
|
||||
else:
|
||||
@ -1135,6 +1156,27 @@ def _normalize_href(href):
|
||||
href = '%s%s' % (imdbURL_base, href)
|
||||
return href
|
||||
|
||||
class DOMHTMLCriticReviewsParser(DOMParserBase):
|
||||
"""Parser for the "critic reviews" pages of a given movie.
|
||||
The page should be provided as a string, as taken from
|
||||
the akas.imdb.com server. The final result will be a
|
||||
dictionary, with a key for every relevant section.
|
||||
|
||||
Example:
|
||||
osparser = DOMHTMLCriticReviewsParser()
|
||||
result = osparser.parse(officialsites_html_string)
|
||||
"""
|
||||
kind = 'critic reviews'
|
||||
|
||||
extractors = [
|
||||
Extractor(label='metascore',
|
||||
path="//div[@class='metascore_wrap']/div/span",
|
||||
attrs=Attribute(key='metascore',
|
||||
path=".//text()")),
|
||||
Extractor(label='metacritic url',
|
||||
path="//div[@class='article']/div[@class='see-more']/a",
|
||||
attrs=Attribute(key='metacritic url',
|
||||
path="./@href")) ]
|
||||
|
||||
class DOMHTMLOfficialsitesParser(DOMParserBase):
|
||||
"""Parser for the "official sites", "external reviews", "newsgroup
|
||||
@ -1471,6 +1513,14 @@ class DOMHTMLSeasonEpisodesParser(DOMParserBase):
|
||||
try: selected_season = int(selected_season)
|
||||
except: pass
|
||||
nd = {selected_season: {}}
|
||||
if 'episode -1' in data:
|
||||
counter = 1
|
||||
for episode in data['episode -1']:
|
||||
while 'episode %d' % counter in data:
|
||||
counter += 1
|
||||
k = 'episode %d' % counter
|
||||
data[k] = [episode]
|
||||
del data['episode -1']
|
||||
for episode_nr, episode in data.iteritems():
|
||||
if not (episode and episode[0] and
|
||||
episode_nr.startswith('episode ')):
|
||||
@ -1860,6 +1910,8 @@ _OBJECTS = {
|
||||
'releasedates_parser': ((DOMHTMLReleaseinfoParser,), None),
|
||||
'ratings_parser': ((DOMHTMLRatingsParser,), None),
|
||||
'officialsites_parser': ((DOMHTMLOfficialsitesParser,), None),
|
||||
'criticrev_parser': ((DOMHTMLCriticReviewsParser,),
|
||||
{'kind': 'critic reviews'}),
|
||||
'externalrev_parser': ((DOMHTMLOfficialsitesParser,),
|
||||
{'kind': 'external reviews'}),
|
||||
'newsgrouprev_parser': ((DOMHTMLOfficialsitesParser,),
|
||||
|
@ -8,7 +8,7 @@ E.g., for "Mel Gibson" the referred pages would be:
|
||||
biography: http://akas.imdb.com/name/nm0000154/bio
|
||||
...and so on...
|
||||
|
||||
Copyright 2004-20101 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
@ -60,6 +60,7 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
|
||||
result = cparser.parse(categorized_html_string)
|
||||
"""
|
||||
_containsObjects = True
|
||||
_name_imdb_index = re.compile(r'\([IVXLCDM]+\)')
|
||||
|
||||
_birth_attrs = [Attribute(key='birth date',
|
||||
path='.//time[@itemprop="birthDate"]/@datetime'),
|
||||
@ -100,6 +101,10 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
|
||||
path=".//text()",
|
||||
postprocess=lambda x: analyze_name(x,
|
||||
canonical=1))),
|
||||
Extractor(label='name_index',
|
||||
path="//h1[@class='header']/span[1]",
|
||||
attrs=Attribute(key='name_index',
|
||||
path="./text()")),
|
||||
|
||||
Extractor(label='birth info',
|
||||
path="//div[h4='Born:']",
|
||||
@ -110,7 +115,7 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
|
||||
attrs=_death_attrs),
|
||||
|
||||
Extractor(label='headshot',
|
||||
path="//td[@id='img_primary']/a",
|
||||
path="//td[@id='img_primary']/div[@class='image']/a",
|
||||
attrs=Attribute(key='headshot',
|
||||
path="./img/@src")),
|
||||
|
||||
@ -152,6 +157,11 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
|
||||
for what in 'birth date', 'death date':
|
||||
if what in data and not data[what]:
|
||||
del data[what]
|
||||
name_index = (data.get('name_index') or '').strip()
|
||||
if name_index:
|
||||
if self._name_imdb_index.match(name_index):
|
||||
data['imdbIndex'] = name_index[1:-1]
|
||||
del data['name_index']
|
||||
# XXX: the code below is for backwards compatibility
|
||||
# probably could be removed
|
||||
for key in data.keys():
|
||||
@ -220,13 +230,13 @@ class DOMHTMLBioParser(DOMParserBase):
|
||||
attrs=Attribute(key='headshot',
|
||||
path="./img/@src")),
|
||||
Extractor(label='birth info',
|
||||
path="//div[h5='Date of Birth']",
|
||||
path="//table[@id='overviewTable']//td[text()='Date of Birth']/following-sibling::td[1]",
|
||||
attrs=_birth_attrs),
|
||||
Extractor(label='death info',
|
||||
path="//div[h5='Date of Death']",
|
||||
path="//table[@id='overviewTable']//td[text()='Date of Death']/following-sibling::td[1]",
|
||||
attrs=_death_attrs),
|
||||
Extractor(label='nick names',
|
||||
path="//div[h5='Nickname']",
|
||||
path="//table[@id='overviewTable']//td[text()='Nickenames']/following-sibling::td[1]",
|
||||
attrs=Attribute(key='nick names',
|
||||
path="./text()",
|
||||
joiner='|',
|
||||
@ -234,25 +244,25 @@ class DOMHTMLBioParser(DOMParserBase):
|
||||
'::(', 1) for n in x.split('|')
|
||||
if n.strip()])),
|
||||
Extractor(label='birth name',
|
||||
path="//div[h5='Birth Name']",
|
||||
path="//table[@id='overviewTable']//td[text()='Birth Name']/following-sibling::td[1]",
|
||||
attrs=Attribute(key='birth name',
|
||||
path="./text()",
|
||||
postprocess=lambda x: canonicalName(x.strip()))),
|
||||
Extractor(label='height',
|
||||
path="//div[h5='Height']",
|
||||
path="//table[@id='overviewTable']//td[text()='Height']/following-sibling::td[1]",
|
||||
attrs=Attribute(key='height',
|
||||
path="./text()",
|
||||
postprocess=lambda x: x.strip())),
|
||||
Extractor(label='mini biography',
|
||||
path="//div[h5='Mini Biography']",
|
||||
path="//a[@name='mini_bio']/following-sibling::div[1 = count(preceding-sibling::a[1] | ../a[@name='mini_bio'])]",
|
||||
attrs=Attribute(key='mini biography',
|
||||
multi=True,
|
||||
path={
|
||||
'bio': "./p//text()",
|
||||
'by': "./b/following-sibling::a/text()"
|
||||
'bio': ".//text()",
|
||||
'by': ".//a[@name='ba']//text()"
|
||||
},
|
||||
postprocess=lambda x: "%s::%s" % \
|
||||
(x.get('bio').strip(),
|
||||
((x.get('bio') or u'').split('- IMDb Mini Biography By:')[0].strip(),
|
||||
(x.get('by') or u'').strip() or u'Anonymous'))),
|
||||
Extractor(label='spouse',
|
||||
path="//div[h5='Spouse']/table/tr",
|
||||
|
@ -5,9 +5,9 @@ This module provides the HTMLSearchCharacterParser class (and the
|
||||
search_character_parser instance), used to parse the results of a search
|
||||
for a given character.
|
||||
E.g., when searching for the name "Jesse James", the parsed page would be:
|
||||
http://akas.imdb.com/find?s=Characters;mx=20;q=Jesse+James
|
||||
http://akas.imdb.com/find?s=ch;mx=20;q=Jesse+James
|
||||
|
||||
Copyright 2007-2009 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2007-2012 Davide Alberani <da@erlug.linux.it>
|
||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
@ -42,7 +42,7 @@ class DOMBasicCharacterParser(DOMBasicMovieParser):
|
||||
|
||||
class DOMHTMLSearchCharacterParser(DOMHTMLSearchMovieParser):
|
||||
_BaseParser = DOMBasicCharacterParser
|
||||
_notDirectHitTitle = '<title>imdb search'
|
||||
_notDirectHitTitle = '<title>find - imdb'
|
||||
_titleBuilder = lambda self, x: build_name(x, canonical=False)
|
||||
_linkPrefix = '/character/ch'
|
||||
|
||||
@ -57,7 +57,7 @@ class DOMHTMLSearchCharacterParser(DOMHTMLSearchMovieParser):
|
||||
{'name': x.get('name')}
|
||||
))]
|
||||
extractors = [Extractor(label='search',
|
||||
path="//td[3]/a[starts-with(@href, " \
|
||||
path="//td[@class='result_text']/a[starts-with(@href, " \
|
||||
"'/character/ch')]/..",
|
||||
attrs=_attrs)]
|
||||
|
||||
|
@ -7,7 +7,7 @@ for a given company.
|
||||
E.g., when searching for the name "Columbia Pictures", the parsed page would be:
|
||||
http://akas.imdb.com/find?s=co;mx=20;q=Columbia+Pictures
|
||||
|
||||
Copyright 2008-2009 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2008-2012 Davide Alberani <da@erlug.linux.it>
|
||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
@ -42,7 +42,7 @@ class DOMBasicCompanyParser(DOMBasicMovieParser):
|
||||
|
||||
class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser):
|
||||
_BaseParser = DOMBasicCompanyParser
|
||||
_notDirectHitTitle = '<title>imdb company'
|
||||
_notDirectHitTitle = '<title>find - imdb'
|
||||
_titleBuilder = lambda self, x: build_company_name(x)
|
||||
_linkPrefix = '/company/co'
|
||||
|
||||
@ -59,7 +59,7 @@ class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser):
|
||||
or u''), stripNotes=True)
|
||||
))]
|
||||
extractors = [Extractor(label='search',
|
||||
path="//td[3]/a[starts-with(@href, " \
|
||||
path="//td[@class='result_text']/a[starts-with(@href, " \
|
||||
"'/company/co')]/..",
|
||||
attrs=_attrs)]
|
||||
|
||||
|
@ -8,7 +8,7 @@ E.g., for when searching for the title "the passion", the parsed
|
||||
page would be:
|
||||
http://akas.imdb.com/find?q=the+passion&tt=on&mx=20
|
||||
|
||||
Copyright 2004-2010 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
@ -77,7 +77,7 @@ class DOMBasicMovieParser(DOMParserBase):
|
||||
def custom_analyze_title(title):
|
||||
"""Remove garbage notes after the (year), (year/imdbIndex) or (year) (TV)"""
|
||||
# XXX: very crappy. :-(
|
||||
nt = title.split(' ')[0]
|
||||
nt = title.split(' aka ')[0]
|
||||
if nt:
|
||||
title = nt
|
||||
if not title:
|
||||
@ -92,7 +92,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||
"new search system" is used, for movies."""
|
||||
|
||||
_BaseParser = DOMBasicMovieParser
|
||||
_notDirectHitTitle = '<title>imdb title'
|
||||
_notDirectHitTitle = '<title>find - imdb</title>'
|
||||
_titleBuilder = lambda self, x: build_title(x)
|
||||
_linkPrefix = '/title/tt'
|
||||
|
||||
@ -101,8 +101,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||
path={
|
||||
'link': "./a[1]/@href",
|
||||
'info': ".//text()",
|
||||
#'akas': ".//div[@class='_imdbpyAKA']//text()"
|
||||
'akas': ".//p[@class='find-aka']//text()"
|
||||
'akas': "./i//text()"
|
||||
},
|
||||
postprocess=lambda x: (
|
||||
analyze_imdbid(x.get('link') or u''),
|
||||
@ -110,7 +109,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||
x.get('akas')
|
||||
))]
|
||||
extractors = [Extractor(label='search',
|
||||
path="//td[3]/a[starts-with(@href, '/title/tt')]/..",
|
||||
path="//td[@class='result_text']",
|
||||
attrs=_attrs)]
|
||||
def _init(self):
|
||||
self.url = u''
|
||||
@ -119,14 +118,11 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||
self.url = u''
|
||||
|
||||
def preprocess_string(self, html_string):
|
||||
if self._notDirectHitTitle in html_string[:1024].lower():
|
||||
if self._notDirectHitTitle in html_string[:10240].lower():
|
||||
if self._linkPrefix == '/title/tt':
|
||||
# Only for movies.
|
||||
# XXX (HTU): does this still apply?
|
||||
html_string = html_string.replace('(TV mini-series)', '(mini)')
|
||||
html_string = html_string.replace('<p class="find-aka">',
|
||||
'<p class="find-aka">::')
|
||||
#html_string = _reAKAStitles.sub(
|
||||
# r'<div class="_imdbpyAKA">\1::</div>\2', html_string)
|
||||
return html_string
|
||||
# Direct hit!
|
||||
dbme = self._BaseParser(useModule=self._useModule)
|
||||
@ -141,7 +137,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||
title = self._titleBuilder(res[0][1])
|
||||
if not (link and title): return u''
|
||||
link = link.replace('http://pro.imdb.com', '')
|
||||
new_html = '<td></td><td></td><td><a href="%s">%s</a></td>' % (link,
|
||||
new_html = '<td class="result_text"><a href="%s">%s</a></td>' % (link,
|
||||
title)
|
||||
return new_html
|
||||
|
||||
@ -161,11 +157,14 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||
if not datum[0] and datum[1]:
|
||||
continue
|
||||
if datum[2] is not None:
|
||||
akas = filter(None, datum[2].split('::'))
|
||||
#akas = filter(None, datum[2].split('::'))
|
||||
if self._linkPrefix == '/title/tt':
|
||||
akas = [a.replace('" - ', '::').rstrip() for a in akas]
|
||||
akas = [a.replace('aka "', '', 1).replace('aka "',
|
||||
'', 1).lstrip() for a in akas]
|
||||
# XXX (HTU): couldn't find a result with multiple akas
|
||||
aka = datum[2]
|
||||
akas = [aka[1:-1]] # remove the quotes
|
||||
#akas = [a.replace('" - ', '::').rstrip() for a in akas]
|
||||
#akas = [a.replace('aka "', '', 1).replace('aka "',
|
||||
#'', 1).lstrip() for a in akas]
|
||||
datum[1]['akas'] = akas
|
||||
data['data'][idx] = (datum[0], datum[1])
|
||||
else:
|
||||
|
@ -7,7 +7,7 @@ for a given person.
|
||||
E.g., when searching for the name "Mel Gibson", the parsed page would be:
|
||||
http://akas.imdb.com/find?q=Mel+Gibson&nm=on&mx=20
|
||||
|
||||
Copyright 2004-2010 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
@ -55,7 +55,7 @@ class DOMHTMLSearchPersonParser(DOMHTMLSearchMovieParser):
|
||||
"""Parse the html page that the IMDb web server shows when the
|
||||
"new search system" is used, for persons."""
|
||||
_BaseParser = DOMBasicPersonParser
|
||||
_notDirectHitTitle = '<title>imdb name'
|
||||
_notDirectHitTitle = '<title>find - imdb'
|
||||
_titleBuilder = lambda self, x: build_name(x, canonical=True)
|
||||
_linkPrefix = '/name/nm'
|
||||
|
||||
@ -74,11 +74,11 @@ class DOMHTMLSearchPersonParser(DOMHTMLSearchMovieParser):
|
||||
canonical=1), x.get('akas')
|
||||
))]
|
||||
extractors = [Extractor(label='search',
|
||||
path="//td[3]/a[starts-with(@href, '/name/nm')]/..",
|
||||
path="//td[@class='result_text']/a[starts-with(@href, '/name/nm')]/..",
|
||||
attrs=_attrs)]
|
||||
|
||||
def preprocess_string(self, html_string):
|
||||
if self._notDirectHitTitle in html_string[:1024].lower():
|
||||
if self._notDirectHitTitle in html_string[:10240].lower():
|
||||
html_string = _reAKASp.sub(
|
||||
r'\1<div class="_imdbpyAKA">\2::</div>\3',
|
||||
html_string)
|
||||
|
@ -340,7 +340,7 @@ def build_movie(txt, movieID=None, roleID=None, status=None,
|
||||
title = title[:nidx].rstrip()
|
||||
if year:
|
||||
year = year.strip()
|
||||
if title[-1] == ')':
|
||||
if title[-1:] == ')':
|
||||
fpIdx = title.rfind('(')
|
||||
if fpIdx != -1:
|
||||
if notes: notes = '%s %s' % (title[fpIdx:], notes)
|
||||
|
@ -6,7 +6,7 @@ IMDb's data for mobile systems.
|
||||
the imdb.IMDb function will return an instance of this class when
|
||||
called with the 'accessSystem' argument set to "mobile".
|
||||
|
||||
Copyright 2005-2011 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2005-2012 Davide Alberani <da@erlug.linux.it>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -193,7 +193,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||
title)
|
||||
return res
|
||||
tl = title[0].lower()
|
||||
if not tl.startswith('imdb title'):
|
||||
if not tl.startswith('find - imdb'):
|
||||
# a direct hit!
|
||||
title = _unHtml(title[0])
|
||||
mid = None
|
||||
@ -211,7 +211,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||
# XXX: this results*3 prevents some recursion errors, but...
|
||||
# it's not exactly understandable (i.e.: why 'results' is
|
||||
# not enough to get all the results?)
|
||||
lis = _findBetween(cont, 'td valign="top">', '</td>',
|
||||
lis = _findBetween(cont, 'td class="result_text">', '</td>',
|
||||
maxRes=results*3)
|
||||
for li in lis:
|
||||
akas = re_makas.findall(li)
|
||||
@ -492,7 +492,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||
self._mobile_logger.warn('no title tag searching for name %s', name)
|
||||
return res
|
||||
nl = name[0].lower()
|
||||
if not nl.startswith('imdb name'):
|
||||
if not nl.startswith('find - imdb'):
|
||||
# a direct hit!
|
||||
name = _unHtml(name[0])
|
||||
name = name.replace('- Filmography by type' , '').strip()
|
||||
@ -506,7 +506,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||
return res
|
||||
res[:] = [(str(pid[0]), analyze_name(name, canonical=1))]
|
||||
else:
|
||||
lis = _findBetween(cont, 'td valign="top">', '</td>',
|
||||
lis = _findBetween(cont, 'td class="result_text">', '</td>',
|
||||
maxRes=results*3)
|
||||
for li in lis:
|
||||
akas = _findBetween(li, '<em>"', '"</em>')
|
||||
@ -771,7 +771,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||
return {'data': d}
|
||||
|
||||
def _search_character(self, name, results):
|
||||
cont = subXMLRefs(self._get_search_content('char', name, results))
|
||||
cont = subXMLRefs(self._get_search_content('ch', name, results))
|
||||
name = _findBetween(cont, '<title>', '</title>', maxRes=1)
|
||||
res = []
|
||||
if not name:
|
||||
@ -779,8 +779,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||
name)
|
||||
return res
|
||||
nl = name[0].lower()
|
||||
if not (nl.startswith('imdb search') or nl.startswith('imdb search') \
|
||||
or nl.startswith('imdb character')):
|
||||
if not nl.startswith('find - imdb'):
|
||||
# a direct hit!
|
||||
name = _unHtml(name[0]).replace('(Character)', '').strip()
|
||||
pid = None
|
||||
@ -793,12 +792,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||
return res
|
||||
res[:] = [(str(pid[0]), analyze_name(name))]
|
||||
else:
|
||||
sects = _findBetween(cont, '<b>Popular Characters</b>', '</table>',
|
||||
maxRes=results*3)
|
||||
sects += _findBetween(cont, '<b>Characters', '</table>',
|
||||
maxRes=results*3)
|
||||
for sect in sects:
|
||||
lis = _findBetween(sect, '<a href="/character/',
|
||||
lis = _findBetween(cont, '<td class="result_text"',
|
||||
['<small', '</td>', '<br'])
|
||||
for li in lis:
|
||||
li = '<%s' % li
|
||||
|
@ -7,7 +7,7 @@ the SQLObject _AND_ SQLAlchemy Object Relational Managers is available.
|
||||
the imdb.IMDb function will return an instance of this class when
|
||||
called with the 'accessSystem' argument set to "sql", "database" or "db".
|
||||
|
||||
Copyright 2005-2010 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2005-2012 Davide Alberani <da@erlug.linux.it>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -452,7 +452,12 @@ def get_movie_data(movieID, kindDict, fromAka=0, _table=None):
|
||||
else:
|
||||
if not fromAka: Table = Title
|
||||
else: Table = AkaTitle
|
||||
try:
|
||||
m = Table.get(movieID)
|
||||
except Exception, e:
|
||||
_aux_logger.warn('Unable to fetch information for movieID %s: %s', movieID, e)
|
||||
mdict = {}
|
||||
return mdict
|
||||
mdict = {'title': m.title, 'kind': kindDict[m.kindID],
|
||||
'year': m.productionYear, 'imdbIndex': m.imdbIndex,
|
||||
'season': m.seasonNr, 'episode': m.episodeNr}
|
||||
@ -825,14 +830,14 @@ class IMDbSqlAccessSystem(IMDbBase):
|
||||
imdbID = movie.imdbID
|
||||
if imdbID is not None: return '%07d' % imdbID
|
||||
m_dict = get_movie_data(movie.id, self._kind)
|
||||
titline = build_title(m_dict, ptdf=1)
|
||||
imdbID = self.title2imdbID(titline)
|
||||
titline = build_title(m_dict, ptdf=0)
|
||||
imdbID = self.title2imdbID(titline, m_dict['kind'])
|
||||
# If the imdbID was retrieved from the web and was not in the
|
||||
# database, update the database (ignoring errors, because it's
|
||||
# possibile that the current user has not update privileges).
|
||||
# There're times when I think I'm a genius; this one of
|
||||
# those times... <g>
|
||||
if imdbID is not None:
|
||||
if imdbID is not None and not isinstance(imdbID, list):
|
||||
try: movie.imdbID = int(imdbID)
|
||||
except: pass
|
||||
return imdbID
|
||||
@ -847,9 +852,9 @@ class IMDbSqlAccessSystem(IMDbBase):
|
||||
imdbID = person.imdbID
|
||||
if imdbID is not None: return '%07d' % imdbID
|
||||
n_dict = {'name': person.name, 'imdbIndex': person.imdbIndex}
|
||||
namline = build_name(n_dict, canonical=1)
|
||||
namline = build_name(n_dict, canonical=False)
|
||||
imdbID = self.name2imdbID(namline)
|
||||
if imdbID is not None:
|
||||
if imdbID is not None and not isinstance(imdbID, list):
|
||||
try: person.imdbID = int(imdbID)
|
||||
except: pass
|
||||
return imdbID
|
||||
@ -864,9 +869,9 @@ class IMDbSqlAccessSystem(IMDbBase):
|
||||
imdbID = character.imdbID
|
||||
if imdbID is not None: return '%07d' % imdbID
|
||||
n_dict = {'name': character.name, 'imdbIndex': character.imdbIndex}
|
||||
namline = build_name(n_dict, canonical=1)
|
||||
namline = build_name(n_dict, canonical=False)
|
||||
imdbID = self.character2imdbID(namline)
|
||||
if imdbID is not None:
|
||||
if imdbID is not None and not isinstance(imdbID, list):
|
||||
try: character.imdbID = int(imdbID)
|
||||
except: pass
|
||||
return imdbID
|
||||
@ -883,7 +888,7 @@ class IMDbSqlAccessSystem(IMDbBase):
|
||||
n_dict = {'name': company.name, 'country': company.countryCode}
|
||||
namline = build_company_name(n_dict)
|
||||
imdbID = self.company2imdbID(namline)
|
||||
if imdbID is not None:
|
||||
if imdbID is not None and not isinstance(imdbID, list):
|
||||
try: company.imdbID = int(imdbID)
|
||||
except: pass
|
||||
return imdbID
|
||||
@ -1116,6 +1121,7 @@ class IMDbSqlAccessSystem(IMDbBase):
|
||||
if mlinks:
|
||||
for ml in mlinks:
|
||||
lmovieData = get_movie_data(ml[0], self._kind)
|
||||
if lmovieData:
|
||||
m = Movie(movieID=ml[0], data=lmovieData, accessSystem='sql')
|
||||
ml[0] = m
|
||||
res['connections'] = {}
|
||||
|
@ -466,6 +466,7 @@ class _AlchemyConnection(object):
|
||||
|
||||
def setConnection(uri, tables, encoding='utf8', debug=False):
|
||||
"""Set connection for every table."""
|
||||
params = {'encoding': encoding}
|
||||
# FIXME: why on earth MySQL requires an additional parameter,
|
||||
# is well beyond my understanding...
|
||||
if uri.startswith('mysql'):
|
||||
@ -474,7 +475,11 @@ def setConnection(uri, tables, encoding='utf8', debug=False):
|
||||
else:
|
||||
uri += '?'
|
||||
uri += 'charset=%s' % encoding
|
||||
params = {'encoding': encoding}
|
||||
|
||||
# On some server configurations, we will need to explictly enable
|
||||
# loading data from local files
|
||||
params['local_infile'] = 1
|
||||
|
||||
if debug:
|
||||
params['echo'] = True
|
||||
if uri.startswith('ibm_db'):
|
||||
|
Binary file not shown.
@ -182,6 +182,10 @@ def setConnection(uri, tables, encoding='utf8', debug=False):
|
||||
kw['use_unicode'] = 1
|
||||
#kw['sqlobject_encoding'] = encoding
|
||||
kw['charset'] = encoding
|
||||
|
||||
# On some server configurations, we will need to explictly enable
|
||||
# loading data from local files
|
||||
kw['local_infile'] = 1
|
||||
conn = connectionForURI(uri, **kw)
|
||||
conn.debug = debug
|
||||
# XXX: doesn't work and a work-around was put in imdbpy2sql.py;
|
||||
|
@ -3,7 +3,7 @@ utils module (imdb package).
|
||||
|
||||
This module provides basic utilities for the imdb package.
|
||||
|
||||
Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||
2009 H. Turgut Uyar <uyar@tekir.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
@ -189,10 +189,9 @@ _unicodeArticles = linguistics.toUnicode(_articles)
|
||||
articlesDicts = linguistics.articlesDictsForLang(None)
|
||||
spArticles = linguistics.spArticlesForLang(None)
|
||||
|
||||
def canonicalTitle(title, lang=None):
|
||||
def canonicalTitle(title, lang=None, imdbIndex=None):
|
||||
"""Return the title in the canonic format 'Movie Title, The';
|
||||
beware that it doesn't handle long imdb titles, but only the
|
||||
title portion, without year[/imdbIndex] or special markup.
|
||||
beware that it doesn't handle long imdb titles.
|
||||
The 'lang' argument can be used to specify the language of the title.
|
||||
"""
|
||||
isUnicode = isinstance(title, unicode)
|
||||
@ -203,15 +202,19 @@ def canonicalTitle(title, lang=None):
|
||||
except IndexError:
|
||||
pass
|
||||
if isUnicode:
|
||||
_format = u'%s, %s'
|
||||
_format = u'%s%s, %s'
|
||||
else:
|
||||
_format = '%s, %s'
|
||||
_format = '%s%s, %s'
|
||||
ltitle = title.lower()
|
||||
if imdbIndex:
|
||||
imdbIndex = ' (%s)' % imdbIndex
|
||||
else:
|
||||
imdbIndex = ''
|
||||
spArticles = linguistics.spArticlesForLang(lang)
|
||||
for article in spArticles[isUnicode]:
|
||||
if ltitle.startswith(article):
|
||||
lart = len(article)
|
||||
title = _format % (title[lart:], title[:lart])
|
||||
title = _format % (title[lart:], imdbIndex, title[:lart])
|
||||
if article[-1] == ' ':
|
||||
title = title[:-1]
|
||||
break
|
||||
@ -383,18 +386,42 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
|
||||
if title.endswith('(TV)'):
|
||||
kind = u'tv movie'
|
||||
title = title[:-4].rstrip()
|
||||
elif title.endswith('(TV Movie)'):
|
||||
kind = u'tv movie'
|
||||
title = title[:-10].rstrip()
|
||||
elif title.endswith('(V)'):
|
||||
kind = u'video movie'
|
||||
title = title[:-3].rstrip()
|
||||
elif title.endswith('(video)'):
|
||||
elif title.lower().endswith('(video)'):
|
||||
kind = u'video movie'
|
||||
title = title[:-7].rstrip()
|
||||
elif title.endswith('(TV Short)'):
|
||||
kind = u'tv short'
|
||||
title = title[:-10].rstrip()
|
||||
elif title.endswith('(TV Mini-Series)'):
|
||||
kind = u'tv mini series'
|
||||
title = title[:-16].rstrip()
|
||||
elif title.endswith('(mini)'):
|
||||
kind = u'tv mini series'
|
||||
title = title[:-6].rstrip()
|
||||
elif title.endswith('(VG)'):
|
||||
kind = u'video game'
|
||||
title = title[:-4].rstrip()
|
||||
elif title.endswith('(Video Game)'):
|
||||
kind = u'video game'
|
||||
title = title[:-12].rstrip()
|
||||
elif title.endswith('(TV Series)'):
|
||||
epindex = title.find('(TV Episode) - ')
|
||||
if epindex >= 0:
|
||||
# It's an episode of a series.
|
||||
kind = u'episode'
|
||||
series_info = analyze_title(title[epindex + 15:])
|
||||
result['episode of'] = series_info.get('title')
|
||||
result['series year'] = series_info.get('year')
|
||||
title = title[:epindex]
|
||||
else:
|
||||
kind = u'tv series'
|
||||
title = title[:-11].rstrip()
|
||||
# Search for the year and the optional imdbIndex (a roman number).
|
||||
yi = re_year_index.findall(title)
|
||||
if not yi:
|
||||
@ -430,9 +457,6 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
|
||||
if not kind:
|
||||
kind = u'tv series'
|
||||
title = title[1:-1].strip()
|
||||
elif title.endswith('(TV series)'):
|
||||
kind = u'tv series'
|
||||
title = title[:-11].rstrip()
|
||||
if not title:
|
||||
raise IMDbParserError('invalid title: "%s"' % original_t)
|
||||
if canonical is not None:
|
||||
@ -489,7 +513,7 @@ def _convertTime(title, fromPTDFtoWEB=1, _emptyString=u''):
|
||||
|
||||
def build_title(title_dict, canonical=None, canonicalSeries=None,
|
||||
canonicalEpisode=None, ptdf=0, lang=None, _doYear=1,
|
||||
_emptyString=u''):
|
||||
_emptyString=u'', appendKind=True):
|
||||
"""Given a dictionary that represents a "long" IMDb title,
|
||||
return a string.
|
||||
|
||||
@ -511,6 +535,11 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
|
||||
doYear = 0
|
||||
if ptdf:
|
||||
doYear = 1
|
||||
# XXX: for results coming from the new search page.
|
||||
if not isinstance(episode_of, (dict, _Container)):
|
||||
episode_of = {'title': episode_of, 'kind': 'tv series'}
|
||||
if 'series year' in title_dict:
|
||||
episode_of['year'] = title_dict['series year']
|
||||
pre_title = build_title(episode_of, canonical=canonicalSeries,
|
||||
ptdf=0, _doYear=doYear,
|
||||
_emptyString=_emptyString)
|
||||
@ -545,12 +574,14 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
|
||||
episode_title += '.%s' % episode
|
||||
episode_title += ')'
|
||||
episode_title = '{%s}' % episode_title
|
||||
return '%s %s' % (pre_title, episode_title)
|
||||
return _emptyString + '%s %s' % (_emptyString + pre_title,
|
||||
_emptyString + episode_title)
|
||||
title = title_dict.get('title', '')
|
||||
imdbIndex = title_dict.get('imdbIndex', '')
|
||||
if not title: return _emptyString
|
||||
if canonical is not None:
|
||||
if canonical:
|
||||
title = canonicalTitle(title, lang=lang)
|
||||
title = canonicalTitle(title, lang=lang, imdbIndex=imdbIndex)
|
||||
else:
|
||||
title = normalizeTitle(title, lang=lang)
|
||||
if pre_title:
|
||||
@ -558,15 +589,20 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
|
||||
if kind in (u'tv series', u'tv mini series'):
|
||||
title = '"%s"' % title
|
||||
if _doYear:
|
||||
imdbIndex = title_dict.get('imdbIndex')
|
||||
year = title_dict.get('year') or u'????'
|
||||
year = title_dict.get('year') or '????'
|
||||
if isinstance(_emptyString, str):
|
||||
year = str(year)
|
||||
imdbIndex = title_dict.get('imdbIndex')
|
||||
if not ptdf:
|
||||
if imdbIndex and (canonical is None or canonical):
|
||||
title += ' (%s)' % imdbIndex
|
||||
title += ' (%s)' % year
|
||||
else:
|
||||
title += ' (%s' % year
|
||||
if imdbIndex:
|
||||
if imdbIndex and (canonical is None or canonical):
|
||||
title += '/%s' % imdbIndex
|
||||
title += ')'
|
||||
if kind:
|
||||
if appendKind and kind:
|
||||
if kind == 'tv movie':
|
||||
title += ' (TV)'
|
||||
elif kind == 'video movie':
|
||||
|
@ -11,6 +11,7 @@ __author__ = "dbr/Ben"
|
||||
__version__ = "1.9"
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import getpass
|
||||
import StringIO
|
||||
@ -18,8 +19,10 @@ import tempfile
|
||||
import warnings
|
||||
import logging
|
||||
import zipfile
|
||||
import datetime as dt
|
||||
import requests
|
||||
import cachecontrol
|
||||
import xmltodict
|
||||
|
||||
try:
|
||||
import xml.etree.cElementTree as ElementTree
|
||||
@ -31,6 +34,7 @@ try:
|
||||
except ImportError:
|
||||
gzip = None
|
||||
|
||||
from lib.dateutil.parser import parse
|
||||
from cachecontrol import caches
|
||||
|
||||
from tvdb_ui import BaseUI, ConsoleUI
|
||||
@ -560,35 +564,60 @@ class Tvdb:
|
||||
except requests.Timeout, e:
|
||||
raise tvdb_error("Connection timed out " + str(e.message) + " while loading URL " + str(url))
|
||||
|
||||
if 'application/zip' in resp.headers.get("Content-Type", '') and resp.ok:
|
||||
def process(path, key, value):
|
||||
key = key.lower()
|
||||
|
||||
# clean up value and do type changes
|
||||
if value:
|
||||
try:
|
||||
# convert to integer if needed
|
||||
if value.isdigit():
|
||||
value = int(value)
|
||||
except:
|
||||
pass
|
||||
|
||||
if key in ['banner', 'fanart', 'poster']:
|
||||
value = self.config['url_artworkPrefix'] % (value)
|
||||
else:
|
||||
value = self._cleanData(value)
|
||||
|
||||
try:
|
||||
if key == 'firstaired' and value in "0000-00-00":
|
||||
new_value = str(dt.date.fromordinal(1))
|
||||
new_value = re.sub("([-]0{2}){1,}", "", new_value)
|
||||
fixDate = parse(new_value, fuzzy=True).date()
|
||||
value = fixDate.strftime("%Y-%m-%d")
|
||||
elif key == 'firstaired':
|
||||
value = parse(value, fuzzy=True).date()
|
||||
value = value.strftime("%Y-%m-%d")
|
||||
except:
|
||||
pass
|
||||
|
||||
value = self._cleanData(value)
|
||||
return (key, value)
|
||||
|
||||
if resp.ok:
|
||||
if 'application/zip' in resp.headers.get("Content-Type", ''):
|
||||
try:
|
||||
# TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20]
|
||||
log().debug("We recived a zip file unpacking now ...")
|
||||
zipdata = StringIO.StringIO()
|
||||
zipdata.write(resp.content)
|
||||
myzipfile = zipfile.ZipFile(zipdata)
|
||||
return myzipfile.read('%s.xml' % language)
|
||||
return xmltodict.parse(myzipfile.read('%s.xml' % language), postprocessor=process)
|
||||
except zipfile.BadZipfile:
|
||||
raise tvdb_error("Bad zip file received from thetvdb.com, could not read it")
|
||||
|
||||
return resp.content if resp.ok else None
|
||||
else:
|
||||
return xmltodict.parse(resp.text.strip(), postprocessor=process)
|
||||
|
||||
def _getetsrc(self, url, params=None, language=None):
|
||||
"""Loads a URL using caching, returns an ElementTree of the source
|
||||
"""
|
||||
src = self._loadUrl(url, params=params, language=language)
|
||||
try:
|
||||
# TVDB doesn't sanitize \r (CR) from user input in some fields,
|
||||
# remove it to avoid errors. Change from SickBeard, from will14m
|
||||
return ElementTree.fromstring(src.rstrip("\r")) if src else None
|
||||
except SyntaxError:
|
||||
src = self._loadUrl(url, params=params, language=language)
|
||||
try:
|
||||
return ElementTree.fromstring(src.rstrip("\r")) if src else None
|
||||
except SyntaxError, exceptionmsg:
|
||||
errormsg = "There was an error with the XML retrieved from thetvdb.com:\n%s" % (
|
||||
exceptionmsg
|
||||
)
|
||||
src = [src[item] for item in src][0]
|
||||
except:
|
||||
errormsg = "There was an error with the XML retrieved from thetvdb.com:"
|
||||
|
||||
if self.config['cache_enabled']:
|
||||
errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
|
||||
@ -599,6 +628,8 @@ class Tvdb:
|
||||
errormsg += "\nhttp://dbr.lighthouseapp.com/projects/13342-tvdb_api/overview\n"
|
||||
raise tvdb_error(errormsg)
|
||||
|
||||
return src
|
||||
|
||||
def _setItem(self, sid, seas, ep, attrib, value):
|
||||
"""Creates a new episode, creating Show(), Season() and
|
||||
Episode()s as required. Called by _getShowData to populate show
|
||||
@ -649,9 +680,8 @@ class Tvdb:
|
||||
log().debug("Searching for show %s" % series)
|
||||
self.config['params_getSeries']['seriesname'] = series
|
||||
seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
|
||||
allSeries = list(dict((s.tag.lower(), s.text) for s in x.getchildren()) for x in seriesEt)
|
||||
|
||||
return allSeries
|
||||
return [seriesEt[item] for item in seriesEt][0]
|
||||
|
||||
def _getSeries(self, series):
|
||||
"""This searches TheTVDB.com for the series name,
|
||||
@ -798,24 +828,13 @@ class Tvdb:
|
||||
self.config['url_seriesInfo'] % (sid, getShowInLanguage)
|
||||
)
|
||||
|
||||
if seriesInfoEt is None: return False
|
||||
for curInfo in seriesInfoEt.findall("Series")[0]:
|
||||
tag = curInfo.tag.lower()
|
||||
value = curInfo.text
|
||||
|
||||
if tag == 'seriesname' and value is None:
|
||||
# check and make sure we have data to process and that it contains a series name
|
||||
if seriesInfoEt is None or 'seriesname' not in seriesInfoEt['series']:
|
||||
return False
|
||||
|
||||
if value is not None:
|
||||
if tag == 'id':
|
||||
value = int(value)
|
||||
for k, v in seriesInfoEt['series'].items():
|
||||
self._setShowData(sid, k, v)
|
||||
|
||||
if tag in ['banner', 'fanart', 'poster']:
|
||||
value = self.config['url_artworkPrefix'] % (value)
|
||||
else:
|
||||
value = self._cleanData(value)
|
||||
|
||||
self._setShowData(sid, tag, value)
|
||||
if seriesSearch:
|
||||
return True
|
||||
|
||||
@ -837,63 +856,40 @@ class Tvdb:
|
||||
|
||||
epsEt = self._getetsrc(url, language=language)
|
||||
|
||||
for cur_ep in epsEt.findall("Episode"):
|
||||
|
||||
for cur_ep in epsEt["episode"]:
|
||||
if self.config['dvdorder']:
|
||||
log().debug('Using DVD ordering.')
|
||||
use_dvd = cur_ep.find('DVD_season').text != None and cur_ep.find('DVD_episodenumber').text != None
|
||||
use_dvd = cur_ep['dvd_season'] != None and cur_ep['dvd_episodenumber'] != None
|
||||
else:
|
||||
use_dvd = False
|
||||
|
||||
if use_dvd:
|
||||
elem_seasnum, elem_epno = cur_ep.find('DVD_season'), cur_ep.find('DVD_episodenumber')
|
||||
seasnum, epno = cur_ep['dvd_season'], cur_ep['dvd_episodenumber']
|
||||
else:
|
||||
elem_seasnum, elem_epno = cur_ep.find('SeasonNumber'), cur_ep.find('EpisodeNumber')
|
||||
|
||||
if elem_seasnum is None or elem_epno is None:
|
||||
seasnum, epno = cur_ep['seasonnumber'], cur_ep['episodenumber']
|
||||
|
||||
if seasnum is None or epno is None:
|
||||
log().warning("An episode has incomplete season/episode number (season: %r, episode: %r)" % (
|
||||
elem_seasnum, elem_epno))
|
||||
log().debug(
|
||||
" ".join(
|
||||
"%r is %r" % (child.tag, child.text) for child in cur_ep.getchildren()))
|
||||
# TODO: Should this happen?
|
||||
seasnum, epno))
|
||||
continue # Skip to next episode
|
||||
|
||||
|
||||
# float() is because https://github.com/dbr/tvnamer/issues/95 - should probably be fixed in TVDB data
|
||||
seas_no = int(float(elem_seasnum.text))
|
||||
ep_no = int(float(elem_epno.text))
|
||||
seas_no = int(float(seasnum))
|
||||
ep_no = int(float(epno))
|
||||
|
||||
useDVD = False
|
||||
for k,v in cur_ep.items():
|
||||
k = k.lower()
|
||||
|
||||
if (self.config['dvdorder']):
|
||||
log().debug('DVD Order? Yes')
|
||||
useDVD = (cur_ep.find('DVD_season').text != None and cur_ep.find('DVD_episodenumber').text != None)
|
||||
if v is not None:
|
||||
if k == 'id':
|
||||
v = int(v)
|
||||
|
||||
if k == 'filename':
|
||||
v = self.config['url_artworkPrefix'] % (v)
|
||||
else:
|
||||
log().debug('DVD Order? No')
|
||||
v = self._cleanData(v)
|
||||
|
||||
if (useDVD):
|
||||
log().debug('Use DVD Order? Yes')
|
||||
seas_no = int(cur_ep.find('DVD_season').text)
|
||||
ep_no = int(float(cur_ep.find('DVD_episodenumber').text))
|
||||
else:
|
||||
log().debug('Use DVD Order? No')
|
||||
seas_no = int(cur_ep.find('SeasonNumber').text)
|
||||
ep_no = int(cur_ep.find('EpisodeNumber').text)
|
||||
|
||||
for cur_item in cur_ep.getchildren():
|
||||
tag = cur_item.tag.lower()
|
||||
value = cur_item.text
|
||||
if value is not None:
|
||||
if tag == 'id':
|
||||
value = int(value)
|
||||
|
||||
if tag == 'filename':
|
||||
value = self.config['url_artworkPrefix'] % (value)
|
||||
else:
|
||||
value = self._cleanData(value)
|
||||
self._setItem(sid, seas_no, ep_no, tag, value)
|
||||
self._setItem(sid, seas_no, ep_no, k, v)
|
||||
|
||||
return True
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python2
|
||||
# !/usr/bin/env python2
|
||||
#encoding:utf-8
|
||||
#author:echel0n
|
||||
#project:tvrage_api
|
||||
@ -24,6 +24,7 @@ import logging
|
||||
import datetime as dt
|
||||
import requests
|
||||
import cachecontrol
|
||||
import xmltodict
|
||||
|
||||
try:
|
||||
import xml.etree.cElementTree as ElementTree
|
||||
@ -37,9 +38,11 @@ from tvrage_ui import BaseUI
|
||||
from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound,
|
||||
tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound)
|
||||
|
||||
|
||||
def log():
|
||||
return logging.getLogger("tvrage_api")
|
||||
|
||||
|
||||
def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
|
||||
"""Retry calling the decorated function using an exponential backoff.
|
||||
|
||||
@ -83,6 +86,7 @@ def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
|
||||
|
||||
return deco_retry
|
||||
|
||||
|
||||
class ShowContainer(dict):
|
||||
"""Simple dict that holds a series of Show instances
|
||||
"""
|
||||
@ -112,6 +116,7 @@ class ShowContainer(dict):
|
||||
class Show(dict):
|
||||
"""Holds a dict of seasons, and show data.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
dict.__init__(self)
|
||||
self.data = {}
|
||||
@ -157,7 +162,7 @@ class Show(dict):
|
||||
raise tvrage_episodenotfound("Could not find any episodes that aired on %s" % date)
|
||||
return ret
|
||||
|
||||
def search(self, term = None, key = None):
|
||||
def search(self, term=None, key=None):
|
||||
"""
|
||||
Search all episodes in show. Can search all data, or a specific key (for
|
||||
example, episodename)
|
||||
@ -173,7 +178,7 @@ class Show(dict):
|
||||
"""
|
||||
results = []
|
||||
for cur_season in self.values():
|
||||
searchresult = cur_season.search(term = term, key = key)
|
||||
searchresult = cur_season.search(term=term, key=key)
|
||||
if len(searchresult) != 0:
|
||||
results.extend(searchresult)
|
||||
|
||||
@ -181,7 +186,7 @@ class Show(dict):
|
||||
|
||||
|
||||
class Season(dict):
|
||||
def __init__(self, show = None):
|
||||
def __init__(self, show=None):
|
||||
"""The show attribute points to the parent show
|
||||
"""
|
||||
self.show = show
|
||||
@ -202,13 +207,13 @@ class Season(dict):
|
||||
else:
|
||||
return dict.__getitem__(self, episode_number)
|
||||
|
||||
def search(self, term = None, key = None):
|
||||
def search(self, term=None, key=None):
|
||||
"""Search all episodes in season, returns a list of matching Episode
|
||||
instances.
|
||||
"""
|
||||
results = []
|
||||
for ep in self.values():
|
||||
searchresult = ep.search(term = term, key = key)
|
||||
searchresult = ep.search(term=term, key=key)
|
||||
if searchresult is not None:
|
||||
results.append(
|
||||
searchresult
|
||||
@ -217,7 +222,7 @@ class Season(dict):
|
||||
|
||||
|
||||
class Episode(dict):
|
||||
def __init__(self, season = None):
|
||||
def __init__(self, season=None):
|
||||
"""The season attribute points to the parent season
|
||||
"""
|
||||
self.season = season
|
||||
@ -242,7 +247,7 @@ class Episode(dict):
|
||||
except KeyError:
|
||||
raise tvrage_attributenotfound("Cannot find attribute %s" % (repr(key)))
|
||||
|
||||
def search(self, term = None, key = None):
|
||||
def search(self, term=None, key=None):
|
||||
"""Search episode data for term, if it matches, return the Episode (self).
|
||||
The key parameter can be used to limit the search to a specific element,
|
||||
for example, episodename.
|
||||
@ -258,22 +263,24 @@ class Episode(dict):
|
||||
if key is not None and cur_key != key:
|
||||
# Do not search this key
|
||||
continue
|
||||
if cur_value.find( unicode(term).lower() ) > -1:
|
||||
if cur_value.find(unicode(term).lower()) > -1:
|
||||
return self
|
||||
|
||||
|
||||
class TVRage:
|
||||
"""Create easy-to-use interface to name of season/episode name"""
|
||||
|
||||
def __init__(self,
|
||||
interactive = False,
|
||||
select_first = False,
|
||||
debug = False,
|
||||
cache = True,
|
||||
banners = False,
|
||||
actors = False,
|
||||
custom_ui = None,
|
||||
language = None,
|
||||
search_all_languages = False,
|
||||
apikey = None,
|
||||
interactive=False,
|
||||
select_first=False,
|
||||
debug=False,
|
||||
cache=True,
|
||||
banners=False,
|
||||
actors=False,
|
||||
custom_ui=None,
|
||||
language=None,
|
||||
search_all_languages=False,
|
||||
apikey=None,
|
||||
forceConnect=False,
|
||||
useZip=False,
|
||||
dvdorder=False):
|
||||
@ -331,8 +338,8 @@ class TVRage:
|
||||
# Hard-coded here as it is realtively static, and saves another HTTP request, as
|
||||
# recommended on http://tvrage.com/wiki/index.php/API:languages.xml
|
||||
self.config['valid_languages'] = [
|
||||
"da", "fi", "nl", "de", "it", "es", "fr","pl", "hu","el","tr",
|
||||
"ru","he","ja","pt","zh","cs","sl", "hr","ko","en","sv","no"
|
||||
"da", "fi", "nl", "de", "it", "es", "fr", "pl", "hu", "el", "tr",
|
||||
"ru", "he", "ja", "pt", "zh", "cs", "sl", "hr", "ko", "en", "sv", "no"
|
||||
]
|
||||
|
||||
# tvrage.com should be based around numeric language codes,
|
||||
@ -390,9 +397,9 @@ class TVRage:
|
||||
|
||||
# get response from TVRage
|
||||
if self.config['cache_enabled']:
|
||||
resp = self.sess.get(url, cache_auto=True, params=params)
|
||||
resp = self.sess.get(url.strip(), cache_auto=True, params=params)
|
||||
else:
|
||||
resp = requests.get(url, params=params)
|
||||
resp = requests.get(url.strip(), params=params)
|
||||
|
||||
except requests.HTTPError, e:
|
||||
raise tvrage_error("HTTP error " + str(e.errno) + " while loading URL " + str(url))
|
||||
@ -403,12 +410,8 @@ class TVRage:
|
||||
except requests.Timeout, e:
|
||||
raise tvrage_error("Connection timed out " + str(e.message) + " while loading URL " + str(url))
|
||||
|
||||
return resp.content if resp.ok else None
|
||||
|
||||
def _getetsrc(self, url, params=None):
|
||||
"""Loads a URL using caching, returns an ElementTree of the source
|
||||
"""
|
||||
reDict = {
|
||||
def remap_keys(path, key, value):
|
||||
name_map = {
|
||||
'showid': 'id',
|
||||
'showname': 'seriesname',
|
||||
'name': 'seriesname',
|
||||
@ -422,54 +425,59 @@ class TVRage:
|
||||
'title': 'episodename',
|
||||
'airdate': 'firstaired',
|
||||
'screencap': 'filename',
|
||||
'seasonnum': 'episodenumber',
|
||||
'seasonnum': 'episodenumber'
|
||||
}
|
||||
|
||||
robj = re.compile('|'.join(reDict.keys()))
|
||||
src = self._loadUrl(url, params)
|
||||
try:
|
||||
# TVRAGE doesn't sanitize \r (CR) from user input in some fields,
|
||||
# remove it to avoid errors. Change from SickBeard, from will14m
|
||||
xml = ElementTree.fromstring(src.rstrip("\r"))
|
||||
tree = ElementTree.ElementTree(xml)
|
||||
for elm in tree.findall('.//*'):
|
||||
elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag)
|
||||
key = name_map[key.lower()]
|
||||
except (ValueError, TypeError, KeyError):
|
||||
key.lower()
|
||||
|
||||
# clean up value and do type changes
|
||||
if value:
|
||||
if isinstance(value, dict):
|
||||
if key == 'network':
|
||||
value = value['#text']
|
||||
if key == 'genre':
|
||||
value = value['genre']
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
value = '|' + '|'.join(value) + '|'
|
||||
|
||||
if elm.tag in 'firstaired':
|
||||
try:
|
||||
if elm.text in "0000-00-00":
|
||||
elm.text = str(dt.date.fromordinal(1))
|
||||
elm.text = re.sub("([-]0{2}){1,}", "", elm.text)
|
||||
fixDate = parse(elm.text, fuzzy=True).date()
|
||||
elm.text = fixDate.strftime("%Y-%m-%d")
|
||||
# convert to integer if needed
|
||||
if value.isdigit():
|
||||
value = int(value)
|
||||
except:
|
||||
pass
|
||||
return ElementTree.fromstring(ElementTree.tostring(xml))
|
||||
except SyntaxError:
|
||||
src = self._loadUrl(url, params)
|
||||
try:
|
||||
xml = ElementTree.fromstring(src.rstrip("\r"))
|
||||
tree = ElementTree.ElementTree(xml)
|
||||
for elm in tree.findall('.//*'):
|
||||
elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag)
|
||||
|
||||
if elm.tag in 'firstaired' and elm.text:
|
||||
if elm.text == "0000-00-00":
|
||||
elm.text = str(dt.date.fromordinal(1))
|
||||
try:
|
||||
#month = strptime(match.group('air_month')[:3],'%b').tm_mon
|
||||
#day = re.sub("(st|nd|rd|th)", "", match.group('air_day'))
|
||||
#dtStr = '%s/%s/%s' % (year, month, day)
|
||||
|
||||
fixDate = parse(elm.text, fuzzy=True)
|
||||
elm.text = fixDate.strftime("%Y-%m-%d")
|
||||
if key == 'firstaired' and value in "0000-00-00":
|
||||
new_value = str(dt.date.fromordinal(1))
|
||||
new_value = re.sub("([-]0{2}){1,}", "", new_value)
|
||||
fixDate = parse(new_value, fuzzy=True).date()
|
||||
value = fixDate.strftime("%Y-%m-%d")
|
||||
elif key == 'firstaired':
|
||||
value = parse(value, fuzzy=True).date()
|
||||
value = value.strftime("%Y-%m-%d")
|
||||
except:
|
||||
pass
|
||||
return ElementTree.fromstring(ElementTree.tostring(xml))
|
||||
except SyntaxError, exceptionmsg:
|
||||
errormsg = "There was an error with the XML retrieved from tvrage.com:\n%s" % (
|
||||
exceptionmsg
|
||||
)
|
||||
|
||||
value = self._cleanData(value)
|
||||
return (key, value)
|
||||
|
||||
if resp.ok:
|
||||
return xmltodict.parse(resp.text.strip(), postprocessor=remap_keys)
|
||||
|
||||
def _getetsrc(self, url, params=None):
|
||||
"""Loads a URL using caching, returns an ElementTree of the source
|
||||
"""
|
||||
|
||||
try:
|
||||
src = self._loadUrl(url, params)
|
||||
src = [src[item] for item in src][0]
|
||||
except:
|
||||
errormsg = "There was an error with the XML retrieved from tvrage.com"
|
||||
|
||||
if self.config['cache_enabled']:
|
||||
errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
|
||||
@ -479,6 +487,8 @@ class TVRage:
|
||||
errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on\n"
|
||||
raise tvrage_error(errormsg)
|
||||
|
||||
return src
|
||||
|
||||
def _setItem(self, sid, seas, ep, attrib, value):
|
||||
"""Creates a new episode, creating Show(), Season() and
|
||||
Episode()s as required. Called by _getShowData to populate show
|
||||
@ -497,9 +507,9 @@ class TVRage:
|
||||
if sid not in self.shows:
|
||||
self.shows[sid] = Show()
|
||||
if seas not in self.shows[sid]:
|
||||
self.shows[sid][seas] = Season(show = self.shows[sid])
|
||||
self.shows[sid][seas] = Season(show=self.shows[sid])
|
||||
if ep not in self.shows[sid][seas]:
|
||||
self.shows[sid][seas][ep] = Episode(season = self.shows[sid][seas])
|
||||
self.shows[sid][seas][ep] = Episode(season=self.shows[sid][seas])
|
||||
self.shows[sid][seas][ep][attrib] = value
|
||||
|
||||
def _setShowData(self, sid, key, value):
|
||||
@ -529,9 +539,8 @@ class TVRage:
|
||||
log().debug("Searching for show %s" % series)
|
||||
self.config['params_getSeries']['show'] = series
|
||||
seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
|
||||
allSeries = list(dict((s.tag.lower(),s.text) for s in x.getchildren()) for x in seriesEt)
|
||||
|
||||
return allSeries
|
||||
return [seriesEt[item] for item in seriesEt][0]
|
||||
|
||||
def _getSeries(self, series):
|
||||
"""This searches tvrage.com for the series name,
|
||||
@ -547,10 +556,10 @@ class TVRage:
|
||||
|
||||
if self.config['custom_ui'] is not None:
|
||||
log().debug("Using custom UI %s" % (repr(self.config['custom_ui'])))
|
||||
ui = self.config['custom_ui'](config = self.config)
|
||||
ui = self.config['custom_ui'](config=self.config)
|
||||
else:
|
||||
log().debug('Auto-selecting first search result using BaseUI')
|
||||
ui = BaseUI(config = self.config)
|
||||
ui = BaseUI(config=self.config)
|
||||
|
||||
return ui.selectSeries(allSeries)
|
||||
|
||||
@ -568,60 +577,47 @@ class TVRage:
|
||||
self.config['params_seriesInfo']
|
||||
)
|
||||
|
||||
if seriesInfoEt is None: return False
|
||||
for curInfo in seriesInfoEt:
|
||||
tag = curInfo.tag.lower()
|
||||
value = curInfo.text
|
||||
|
||||
if tag == 'seriesname' and value is None:
|
||||
# check and make sure we have data to process and that it contains a series name
|
||||
if seriesInfoEt is None or 'seriesname' not in seriesInfoEt:
|
||||
return False
|
||||
|
||||
if tag == 'id':
|
||||
value = int(value)
|
||||
for k, v in seriesInfoEt.items():
|
||||
self._setShowData(sid, k, v)
|
||||
|
||||
if value is not None:
|
||||
value = self._cleanData(value)
|
||||
|
||||
self._setShowData(sid, tag, value)
|
||||
if seriesSearch: return True
|
||||
|
||||
try:
|
||||
# Parse genre data
|
||||
log().debug('Getting genres of %s' % (sid))
|
||||
for genre in seriesInfoEt.find('genres'):
|
||||
tag = genre.tag.lower()
|
||||
|
||||
value = genre.text
|
||||
if value is not None:
|
||||
value = self._cleanData(value)
|
||||
|
||||
self._setShowData(sid, tag, value)
|
||||
except Exception:
|
||||
log().debug('No genres for %s' % (sid))
|
||||
# series search ends here
|
||||
if seriesSearch:
|
||||
return True
|
||||
|
||||
# Parse episode data
|
||||
log().debug('Getting all episodes of %s' % (sid))
|
||||
|
||||
self.config['params_epInfo']['sid'] = sid
|
||||
epsEt = self._getetsrc(self.config['url_epInfo'], self.config['params_epInfo'])
|
||||
for cur_list in epsEt.findall("Episodelist"):
|
||||
for cur_seas in cur_list:
|
||||
try:
|
||||
seas_no = int(cur_seas.attrib['no'])
|
||||
for cur_ep in cur_seas:
|
||||
ep_no = int(cur_ep.find('episodenumber').text)
|
||||
|
||||
for season in epsEt['Episodelist']['Season']:
|
||||
episodes = season['episode']
|
||||
if not isinstance(episodes, list):
|
||||
episodes = [episodes]
|
||||
|
||||
for episode in episodes:
|
||||
seas_no = int(season['@no'])
|
||||
ep_no = int(episode['episodenumber'])
|
||||
self._setItem(sid, seas_no, ep_no, 'seasonnumber', seas_no)
|
||||
for cur_item in cur_ep:
|
||||
tag = cur_item.tag.lower()
|
||||
|
||||
value = cur_item.text
|
||||
if value is not None:
|
||||
if tag == 'id':
|
||||
value = int(value)
|
||||
for k,v in episode.items():
|
||||
try:
|
||||
k = k.lower()
|
||||
if v is not None:
|
||||
if k == 'link':
|
||||
v = v.rsplit('/', 1)[1]
|
||||
k = 'id'
|
||||
|
||||
value = self._cleanData(value)
|
||||
if k == 'id':
|
||||
v = int(v)
|
||||
|
||||
self._setItem(sid, seas_no, ep_no, tag, value)
|
||||
v = self._cleanData(v)
|
||||
|
||||
self._setItem(sid, seas_no, ep_no, k, v)
|
||||
except:
|
||||
continue
|
||||
return True
|
||||
@ -632,7 +628,7 @@ class TVRage:
|
||||
the correct SID.
|
||||
"""
|
||||
if name in self.corrections:
|
||||
log().debug('Correcting %s to %s' % (name, self.corrections[name]) )
|
||||
log().debug('Correcting %s to %s' % (name, self.corrections[name]))
|
||||
return self.corrections[name]
|
||||
else:
|
||||
log().debug('Getting show %s' % (name))
|
||||
@ -673,11 +669,13 @@ def main():
|
||||
grabs an episode name interactively.
|
||||
"""
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
tvrage_instance = TVRage(cache=False)
|
||||
print tvrage_instance['Lost']['seriesname']
|
||||
print tvrage_instance['Lost'][1][4]['episodename']
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
359
lib/xmltodict.py
Normal file
359
lib/xmltodict.py
Normal file
@ -0,0 +1,359 @@
|
||||
#!/usr/bin/env python
|
||||
"Makes working with XML feel like you are working with JSON"
|
||||
|
||||
from xml.parsers import expat
|
||||
from xml.sax.saxutils import XMLGenerator
|
||||
from xml.sax.xmlreader import AttributesImpl
|
||||
try: # pragma no cover
|
||||
from cStringIO import StringIO
|
||||
except ImportError: # pragma no cover
|
||||
try:
|
||||
from StringIO import StringIO
|
||||
except ImportError:
|
||||
from io import StringIO
|
||||
try: # pragma no cover
|
||||
from collections import OrderedDict
|
||||
except ImportError: # pragma no cover
|
||||
try:
|
||||
from ordereddict import OrderedDict
|
||||
except ImportError:
|
||||
OrderedDict = dict
|
||||
|
||||
try: # pragma no cover
|
||||
_basestring = basestring
|
||||
except NameError: # pragma no cover
|
||||
_basestring = str
|
||||
try: # pragma no cover
|
||||
_unicode = unicode
|
||||
except NameError: # pragma no cover
|
||||
_unicode = str
|
||||
|
||||
__author__ = 'Martin Blech'
|
||||
__version__ = '0.9.0'
|
||||
__license__ = 'MIT'
|
||||
|
||||
|
||||
class ParsingInterrupted(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class _DictSAXHandler(object):
|
||||
def __init__(self,
|
||||
item_depth=0,
|
||||
item_callback=lambda *args: True,
|
||||
xml_attribs=True,
|
||||
attr_prefix='@',
|
||||
cdata_key='#text',
|
||||
force_cdata=False,
|
||||
cdata_separator='',
|
||||
postprocessor=None,
|
||||
dict_constructor=OrderedDict,
|
||||
strip_whitespace=True,
|
||||
namespace_separator=':',
|
||||
namespaces=None):
|
||||
self.path = []
|
||||
self.stack = []
|
||||
self.data = None
|
||||
self.item = None
|
||||
self.item_depth = item_depth
|
||||
self.xml_attribs = xml_attribs
|
||||
self.item_callback = item_callback
|
||||
self.attr_prefix = attr_prefix
|
||||
self.cdata_key = cdata_key
|
||||
self.force_cdata = force_cdata
|
||||
self.cdata_separator = cdata_separator
|
||||
self.postprocessor = postprocessor
|
||||
self.dict_constructor = dict_constructor
|
||||
self.strip_whitespace = strip_whitespace
|
||||
self.namespace_separator = namespace_separator
|
||||
self.namespaces = namespaces
|
||||
|
||||
def _build_name(self, full_name):
|
||||
if not self.namespaces:
|
||||
return full_name
|
||||
i = full_name.rfind(self.namespace_separator)
|
||||
if i == -1:
|
||||
return full_name
|
||||
namespace, name = full_name[:i], full_name[i+1:]
|
||||
short_namespace = self.namespaces.get(namespace, namespace)
|
||||
if not short_namespace:
|
||||
return name
|
||||
else:
|
||||
return self.namespace_separator.join((short_namespace, name))
|
||||
|
||||
def _attrs_to_dict(self, attrs):
|
||||
if isinstance(attrs, dict):
|
||||
return attrs
|
||||
return self.dict_constructor(zip(attrs[0::2], attrs[1::2]))
|
||||
|
||||
def startElement(self, full_name, attrs):
|
||||
name = self._build_name(full_name)
|
||||
attrs = self._attrs_to_dict(attrs)
|
||||
self.path.append((name, attrs or None))
|
||||
if len(self.path) > self.item_depth:
|
||||
self.stack.append((self.item, self.data))
|
||||
if self.xml_attribs:
|
||||
attrs = self.dict_constructor(
|
||||
(self.attr_prefix+key, value)
|
||||
for (key, value) in attrs.items())
|
||||
else:
|
||||
attrs = None
|
||||
self.item = attrs or None
|
||||
self.data = None
|
||||
|
||||
def endElement(self, full_name):
|
||||
name = self._build_name(full_name)
|
||||
if len(self.path) == self.item_depth:
|
||||
item = self.item
|
||||
if item is None:
|
||||
item = self.data
|
||||
should_continue = self.item_callback(self.path, item)
|
||||
if not should_continue:
|
||||
raise ParsingInterrupted()
|
||||
if len(self.stack):
|
||||
item, data = self.item, self.data
|
||||
self.item, self.data = self.stack.pop()
|
||||
if self.strip_whitespace and data is not None:
|
||||
data = data.strip() or None
|
||||
if data and self.force_cdata and item is None:
|
||||
item = self.dict_constructor()
|
||||
if item is not None:
|
||||
if data:
|
||||
self.push_data(item, self.cdata_key, data)
|
||||
self.item = self.push_data(self.item, name, item)
|
||||
else:
|
||||
self.item = self.push_data(self.item, name, data)
|
||||
else:
|
||||
self.item = self.data = None
|
||||
self.path.pop()
|
||||
|
||||
def characters(self, data):
|
||||
if not self.data:
|
||||
self.data = data
|
||||
else:
|
||||
self.data += self.cdata_separator + data
|
||||
|
||||
def push_data(self, item, key, data):
|
||||
if self.postprocessor is not None:
|
||||
result = self.postprocessor(self.path, key, data)
|
||||
if result is None:
|
||||
return item
|
||||
key, data = result
|
||||
if item is None:
|
||||
item = self.dict_constructor()
|
||||
try:
|
||||
value = item[key]
|
||||
if isinstance(value, list):
|
||||
value.append(data)
|
||||
else:
|
||||
item[key] = [value, data]
|
||||
except KeyError:
|
||||
item[key] = data
|
||||
return item
|
||||
|
||||
|
||||
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
|
||||
namespace_separator=':', **kwargs):
|
||||
"""Parse the given XML input and convert it into a dictionary.
|
||||
|
||||
`xml_input` can either be a `string` or a file-like object.
|
||||
|
||||
If `xml_attribs` is `True`, element attributes are put in the dictionary
|
||||
among regular child elements, using `@` as a prefix to avoid collisions. If
|
||||
set to `False`, they are just ignored.
|
||||
|
||||
Simple example::
|
||||
|
||||
>>> import xmltodict
|
||||
>>> doc = xmltodict.parse(\"\"\"
|
||||
... <a prop="x">
|
||||
... <b>1</b>
|
||||
... <b>2</b>
|
||||
... </a>
|
||||
... \"\"\")
|
||||
>>> doc['a']['@prop']
|
||||
u'x'
|
||||
>>> doc['a']['b']
|
||||
[u'1', u'2']
|
||||
|
||||
If `item_depth` is `0`, the function returns a dictionary for the root
|
||||
element (default behavior). Otherwise, it calls `item_callback` every time
|
||||
an item at the specified depth is found and returns `None` in the end
|
||||
(streaming mode).
|
||||
|
||||
The callback function receives two parameters: the `path` from the document
|
||||
root to the item (name-attribs pairs), and the `item` (dict). If the
|
||||
callback's return value is false-ish, parsing will be stopped with the
|
||||
:class:`ParsingInterrupted` exception.
|
||||
|
||||
Streaming example::
|
||||
|
||||
>>> def handle(path, item):
|
||||
... print 'path:%s item:%s' % (path, item)
|
||||
... return True
|
||||
...
|
||||
>>> xmltodict.parse(\"\"\"
|
||||
... <a prop="x">
|
||||
... <b>1</b>
|
||||
... <b>2</b>
|
||||
... </a>\"\"\", item_depth=2, item_callback=handle)
|
||||
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
|
||||
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2
|
||||
|
||||
The optional argument `postprocessor` is a function that takes `path`,
|
||||
`key` and `value` as positional arguments and returns a new `(key, value)`
|
||||
pair where both `key` and `value` may have changed. Usage example::
|
||||
|
||||
>>> def postprocessor(path, key, value):
|
||||
... try:
|
||||
... return key + ':int', int(value)
|
||||
... except (ValueError, TypeError):
|
||||
... return key, value
|
||||
>>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
|
||||
... postprocessor=postprocessor)
|
||||
OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))])
|
||||
|
||||
You can pass an alternate version of `expat` (such as `defusedexpat`) by
|
||||
using the `expat` parameter. E.g:
|
||||
|
||||
>>> import defusedexpat
|
||||
>>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
|
||||
OrderedDict([(u'a', u'hello')])
|
||||
|
||||
"""
|
||||
handler = _DictSAXHandler(namespace_separator=namespace_separator,
|
||||
**kwargs)
|
||||
if isinstance(xml_input, _unicode):
|
||||
if not encoding:
|
||||
encoding = 'utf-8'
|
||||
xml_input = xml_input.encode(encoding)
|
||||
if not process_namespaces:
|
||||
namespace_separator = None
|
||||
parser = expat.ParserCreate(
|
||||
encoding,
|
||||
namespace_separator
|
||||
)
|
||||
try:
|
||||
parser.ordered_attributes = True
|
||||
except AttributeError:
|
||||
# Jython's expat does not support ordered_attributes
|
||||
pass
|
||||
parser.StartElementHandler = handler.startElement
|
||||
parser.EndElementHandler = handler.endElement
|
||||
parser.CharacterDataHandler = handler.characters
|
||||
parser.buffer_text = True
|
||||
try:
|
||||
parser.ParseFile(xml_input)
|
||||
except (TypeError, AttributeError):
|
||||
parser.Parse(xml_input, True)
|
||||
return handler.item
|
||||
|
||||
|
||||
def _emit(key, value, content_handler,
|
||||
attr_prefix='@',
|
||||
cdata_key='#text',
|
||||
depth=0,
|
||||
preprocessor=None,
|
||||
pretty=False,
|
||||
newl='\n',
|
||||
indent='\t'):
|
||||
if preprocessor is not None:
|
||||
result = preprocessor(key, value)
|
||||
if result is None:
|
||||
return
|
||||
key, value = result
|
||||
if not isinstance(value, (list, tuple)):
|
||||
value = [value]
|
||||
if depth == 0 and len(value) > 1:
|
||||
raise ValueError('document with multiple roots')
|
||||
for v in value:
|
||||
if v is None:
|
||||
v = OrderedDict()
|
||||
elif not isinstance(v, dict):
|
||||
v = _unicode(v)
|
||||
if isinstance(v, _basestring):
|
||||
v = OrderedDict(((cdata_key, v),))
|
||||
cdata = None
|
||||
attrs = OrderedDict()
|
||||
children = []
|
||||
for ik, iv in v.items():
|
||||
if ik == cdata_key:
|
||||
cdata = iv
|
||||
continue
|
||||
if ik.startswith(attr_prefix):
|
||||
attrs[ik[len(attr_prefix):]] = iv
|
||||
continue
|
||||
children.append((ik, iv))
|
||||
if pretty:
|
||||
content_handler.ignorableWhitespace(depth * indent)
|
||||
content_handler.startElement(key, AttributesImpl(attrs))
|
||||
if pretty and children:
|
||||
content_handler.ignorableWhitespace(newl)
|
||||
for child_key, child_value in children:
|
||||
_emit(child_key, child_value, content_handler,
|
||||
attr_prefix, cdata_key, depth+1, preprocessor,
|
||||
pretty, newl, indent)
|
||||
if cdata is not None:
|
||||
content_handler.characters(cdata)
|
||||
if pretty and children:
|
||||
content_handler.ignorableWhitespace(depth * indent)
|
||||
content_handler.endElement(key)
|
||||
if pretty and depth:
|
||||
content_handler.ignorableWhitespace(newl)
|
||||
|
||||
|
||||
def unparse(input_dict, output=None, encoding='utf-8', full_document=True,
|
||||
**kwargs):
|
||||
"""Emit an XML document for the given `input_dict` (reverse of `parse`).
|
||||
|
||||
The resulting XML document is returned as a string, but if `output` (a
|
||||
file-like object) is specified, it is written there instead.
|
||||
|
||||
Dictionary keys prefixed with `attr_prefix` (default=`'@'`) are interpreted
|
||||
as XML node attributes, whereas keys equal to `cdata_key`
|
||||
(default=`'#text'`) are treated as character data.
|
||||
|
||||
The `pretty` parameter (default=`False`) enables pretty-printing. In this
|
||||
mode, lines are terminated with `'\n'` and indented with `'\t'`, but this
|
||||
can be customized with the `newl` and `indent` parameters.
|
||||
|
||||
"""
|
||||
((key, value),) = input_dict.items()
|
||||
must_return = False
|
||||
if output is None:
|
||||
output = StringIO()
|
||||
must_return = True
|
||||
content_handler = XMLGenerator(output, encoding)
|
||||
if full_document:
|
||||
content_handler.startDocument()
|
||||
_emit(key, value, content_handler, **kwargs)
|
||||
if full_document:
|
||||
content_handler.endDocument()
|
||||
if must_return:
|
||||
value = output.getvalue()
|
||||
try: # pragma no cover
|
||||
value = value.decode(encoding)
|
||||
except AttributeError: # pragma no cover
|
||||
pass
|
||||
return value
|
||||
|
||||
if __name__ == '__main__': # pragma: no cover
|
||||
import sys
|
||||
import marshal
|
||||
|
||||
(item_depth,) = sys.argv[1:]
|
||||
item_depth = int(item_depth)
|
||||
|
||||
def handle_item(path, item):
|
||||
marshal.dump((path, item), sys.stdout)
|
||||
return True
|
||||
|
||||
try:
|
||||
root = parse(sys.stdin,
|
||||
item_depth=item_depth,
|
||||
item_callback=handle_item,
|
||||
dict_constructor=dict)
|
||||
if item_depth == 0:
|
||||
handle_item([], root)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
@ -782,15 +782,11 @@ class GenericMetadata():
|
||||
|
||||
# Try and get posters and fanart from TMDB
|
||||
if image_url is None:
|
||||
for show_name in set(allPossibleShowNames(show_obj)):
|
||||
if image_type in ('poster', 'poster_thumb'):
|
||||
image_url = self._retrieve_show_images_from_tmdb(show_obj, poster=True)
|
||||
elif image_type == 'fanart':
|
||||
image_url = self._retrieve_show_images_from_tmdb(show_obj, backdrop=True)
|
||||
|
||||
if image_url:
|
||||
break
|
||||
|
||||
if image_url:
|
||||
image_data = metadata_helpers.getShowImage(image_url, which)
|
||||
return image_data
|
||||
@ -965,8 +961,6 @@ class GenericMetadata():
|
||||
return (indexer_id, name, indexer)
|
||||
|
||||
def _retrieve_show_images_from_tmdb(self, show, backdrop=False, poster=False):
|
||||
tmdb_id = None
|
||||
|
||||
# get TMDB configuration info
|
||||
tmdb = TMDB(sickbeard.TMDB_API_KEY)
|
||||
config = tmdb.Configuration()
|
||||
@ -981,27 +975,14 @@ class GenericMetadata():
|
||||
|
||||
try:
|
||||
search = tmdb.Search()
|
||||
for result in search.collection({'query': show.name}) + search.tv({'query': show.name}):
|
||||
tmdb_id = result['id']
|
||||
external_ids = tmdb.TV(tmdb_id).external_ids()
|
||||
if show.indexerid in [external_ids['tvdb_id'], external_ids['tvrage_id']]:
|
||||
break
|
||||
for show_name in set(allPossibleShowNames(show)):
|
||||
for result in search.collection({'query': show_name})['results'] + search.tv({'query': show_name})['results']:
|
||||
if backdrop and result['backdrop_path']:
|
||||
return "{0}{1}{2}".format(base_url, max_size, result['backdrop_path'])
|
||||
elif poster and result['poster_path']:
|
||||
return "{0}{1}{2}".format(base_url, max_size, result['poster_path'])
|
||||
|
||||
if tmdb_id:
|
||||
images = tmdb.Collections(tmdb_id).images()
|
||||
if len(images) > 0:
|
||||
# get backdrop urls
|
||||
if backdrop:
|
||||
rel_path = images['backdrops'][0]['file_path']
|
||||
url = "{0}{1}{2}".format(base_url, max_size, rel_path)
|
||||
return url
|
||||
|
||||
# get poster urls
|
||||
if poster:
|
||||
rel_path = images['posters'][0]['file_path']
|
||||
url = "{0}{1}{2}".format(base_url, max_size, rel_path)
|
||||
return url
|
||||
except:
|
||||
except Exception, e:
|
||||
pass
|
||||
|
||||
logger.log(u"Could not find any posters or background for " + show.name, logger.DEBUG)
|
@ -829,7 +829,7 @@ class TVShow(object):
|
||||
self.airs = myEp["airs_dayofweek"] + " " + myEp["airs_time"]
|
||||
|
||||
if getattr(myEp, 'firstaired', None) is not None:
|
||||
self.startyear = int(myEp["firstaired"].split('-')[0])
|
||||
self.startyear = int(str(myEp["firstaired"]).split('-')[0])
|
||||
|
||||
self.status = getattr(myEp, 'status', '')
|
||||
|
||||
@ -855,7 +855,6 @@ class TVShow(object):
|
||||
i = imdb.IMDb()
|
||||
imdbTv = i.get_movie(str(re.sub("[^0-9]", "", self.imdbid)))
|
||||
|
||||
test = imdbTv.keys()
|
||||
for key in filter(lambda x: x.replace('_', ' ') in imdbTv.keys(), imdb_info.keys()):
|
||||
# Store only the first value for string type
|
||||
if type(imdb_info[key]) == type('') and type(imdbTv.get(key)) == type([]):
|
||||
@ -1556,7 +1555,7 @@ class TVEpisode(object):
|
||||
self.deleteEpisode()
|
||||
return False
|
||||
|
||||
if myEp["absolute_number"] == None or myEp["absolute_number"] == "":
|
||||
if getattr(myEp, 'absolute_number', None) is None:
|
||||
logger.log(u"This episode (" + self.show.name + " - " + str(season) + "x" + str(
|
||||
episode) + ") has no absolute number on " + sickbeard.indexerApi(
|
||||
self.indexer).name
|
||||
@ -1564,7 +1563,7 @@ class TVEpisode(object):
|
||||
else:
|
||||
logger.log(
|
||||
str(self.show.indexerid) + ": The absolute_number for " + str(season) + "x" + str(episode) + " is : " +
|
||||
myEp["absolute_number"], logger.DEBUG)
|
||||
str(myEp["absolute_number"]), logger.DEBUG)
|
||||
self.absolute_number = int(myEp["absolute_number"])
|
||||
|
||||
self.name = getattr(myEp, 'episodename', "")
|
||||
@ -1603,6 +1602,7 @@ class TVEpisode(object):
|
||||
u"The show dir is missing, not bothering to change the episode statuses since it'd probably be invalid")
|
||||
return
|
||||
|
||||
if self.location:
|
||||
logger.log(str(self.show.indexerid) + u": Setting status for " + str(season) + "x" + str(
|
||||
episode) + " based on status " + str(self.status) + " and existence of " + self.location, logger.DEBUG)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user