Update imdbpy libs to v5.0
Fixed invalid indexer id issues for TVRage shows. Fixed issues for getting posters and backdrops for TVRage shows. We now convert XML straight to a dict object for Indexer APIs, improved overall performance api's Fixed issues with TVRage shows and displaying genre's properly.
This commit is contained in:
parent
764cf6e62e
commit
2dcd26e69c
|
@ -6,7 +6,7 @@ a person from the IMDb database.
|
|||
It can fetch data through different media (e.g.: the IMDb web pages,
|
||||
a SQL database, etc.)
|
||||
|
||||
Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2004-2014 Davide Alberani <da@erlug.linux.it>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -25,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
|
||||
__all__ = ['IMDb', 'IMDbError', 'Movie', 'Person', 'Character', 'Company',
|
||||
'available_access_systems']
|
||||
__version__ = VERSION = '4.9'
|
||||
__version__ = VERSION = '5.0'
|
||||
|
||||
# Import compatibility module (importing it is enough).
|
||||
import _compat
|
||||
|
@ -160,6 +160,7 @@ def IMDb(accessSystem=None, *arguments, **keywords):
|
|||
kwds.update(keywords)
|
||||
keywords = kwds
|
||||
except Exception, e:
|
||||
import logging
|
||||
logging.getLogger('imdbpy').warn('Unable to read configuration' \
|
||||
' file; complete error: %s' % e)
|
||||
# It just LOOKS LIKE a bad habit: we tried to read config
|
||||
|
@ -303,7 +304,7 @@ class IMDbBase:
|
|||
# http://akas.imdb.com/keyword/%s/
|
||||
imdbURL_keyword_main=imdbURL_base + 'keyword/%s/'
|
||||
# http://akas.imdb.com/chart/top
|
||||
imdbURL_top250=imdbURL_base + 'chart/top',
|
||||
imdbURL_top250=imdbURL_base + 'chart/top'
|
||||
# http://akas.imdb.com/chart/bottom
|
||||
imdbURL_bottom100=imdbURL_base + 'chart/bottom'
|
||||
# http://akas.imdb.com/find?%s
|
||||
|
@ -824,22 +825,23 @@ class IMDbBase:
|
|||
# subclass, somewhere under the imdb.parser package.
|
||||
raise NotImplementedError('override this method')
|
||||
|
||||
def _searchIMDb(self, kind, ton):
|
||||
def _searchIMDb(self, kind, ton, title_kind=None):
|
||||
"""Search the IMDb akas server for the given title or name."""
|
||||
# The Exact Primary search system has gone AWOL, so we resort
|
||||
# to the mobile search. :-/
|
||||
if not ton:
|
||||
return None
|
||||
ton = ton.strip('"')
|
||||
aSystem = IMDb('mobile')
|
||||
if kind == 'tt':
|
||||
searchFunct = aSystem.search_movie
|
||||
check = 'long imdb canonical title'
|
||||
check = 'long imdb title'
|
||||
elif kind == 'nm':
|
||||
searchFunct = aSystem.search_person
|
||||
check = 'long imdb canonical name'
|
||||
check = 'long imdb name'
|
||||
elif kind == 'char':
|
||||
searchFunct = aSystem.search_character
|
||||
check = 'long imdb canonical name'
|
||||
check = 'long imdb name'
|
||||
elif kind == 'co':
|
||||
# XXX: are [COUNTRY] codes included in the results?
|
||||
searchFunct = aSystem.search_company
|
||||
|
@ -852,24 +854,42 @@ class IMDbBase:
|
|||
# exact match.
|
||||
if len(searchRes) == 1:
|
||||
return searchRes[0].getID()
|
||||
title_only_matches = []
|
||||
for item in searchRes:
|
||||
# Return the first perfect match.
|
||||
if item[check] == ton:
|
||||
if item[check].strip('"') == ton:
|
||||
# For titles do additional check for kind
|
||||
if kind != 'tt' or title_kind == item['kind']:
|
||||
return item.getID()
|
||||
elif kind == 'tt':
|
||||
title_only_matches.append(item.getID())
|
||||
# imdbpy2sql.py could detected wrong type, so if no title and kind
|
||||
# matches found - collect all results with title only match
|
||||
# Return list of IDs if multiple matches (can happen when searching
|
||||
# titles with no title_kind specified)
|
||||
# Example: DB: Band of Brothers "tv series" vs "tv mini-series"
|
||||
if title_only_matches:
|
||||
if len(title_only_matches) == 1:
|
||||
return title_only_matches[0]
|
||||
else:
|
||||
return title_only_matches
|
||||
return None
|
||||
|
||||
def title2imdbID(self, title):
|
||||
def title2imdbID(self, title, kind=None):
|
||||
"""Translate a movie title (in the plain text data files format)
|
||||
to an imdbID.
|
||||
Try an Exact Primary Title search on IMDb;
|
||||
return None if it's unable to get the imdbID."""
|
||||
return self._searchIMDb('tt', title)
|
||||
return None if it's unable to get the imdbID;
|
||||
Always specify kind: movie, tv series, video game etc. or search can
|
||||
return list of IDs if multiple matches found
|
||||
"""
|
||||
return self._searchIMDb('tt', title, kind)
|
||||
|
||||
def name2imdbID(self, name):
|
||||
"""Translate a person name in an imdbID.
|
||||
Try an Exact Primary Name search on IMDb;
|
||||
return None if it's unable to get the imdbID."""
|
||||
return self._searchIMDb('tt', name)
|
||||
return self._searchIMDb('nm', name)
|
||||
|
||||
def character2imdbID(self, name):
|
||||
"""Translate a character name in an imdbID.
|
||||
|
@ -896,7 +916,8 @@ class IMDbBase:
|
|||
imdbID = aSystem.get_imdbMovieID(mop.movieID)
|
||||
else:
|
||||
imdbID = aSystem.title2imdbID(build_title(mop, canonical=0,
|
||||
ptdf=1))
|
||||
ptdf=0, appendKind=False),
|
||||
mop['kind'])
|
||||
elif isinstance(mop, Person.Person):
|
||||
if mop.personID is not None:
|
||||
imdbID = aSystem.get_imdbPersonID(mop.personID)
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
|
||||
[imdbpy]
|
||||
## Default.
|
||||
accessSystem = mobile
|
||||
accessSystem = http
|
||||
|
||||
## Optional (options common to every data access system):
|
||||
# Activate adult searches (on, by default).
|
||||
|
@ -37,7 +37,7 @@ accessSystem = mobile
|
|||
# Number of results for searches (20 by default).
|
||||
#results = 20
|
||||
# Re-raise all caught exceptions (off, by default).
|
||||
reraiseExceptions = on
|
||||
#reraiseExceptions = off
|
||||
|
||||
## Optional (options common to http and mobile data access systems):
|
||||
# Proxy used to access the network. If it requires authentication,
|
||||
|
@ -69,7 +69,7 @@ reraiseExceptions = on
|
|||
## Set the threshold for logging messages.
|
||||
# Can be one of "debug", "info", "warning", "error", "critical" (default:
|
||||
# "warning").
|
||||
loggingLevel = info
|
||||
#loggingLevel = debug
|
||||
|
||||
## Path to a configuration file for the logging facility;
|
||||
# see: http://docs.python.org/library/logging.html#configuring-logging
|
||||
|
|
|
@ -64,8 +64,10 @@ LANG_ARTICLES = {
|
|||
'English': ('the', 'a', 'an'),
|
||||
'Italian': ('la', 'le', "l'", 'il', 'i', 'un', 'una', 'gli', 'lo', "un'",
|
||||
'uno'),
|
||||
'Spanish': ('la', 'le', 'el', 'les', 'un', 'los', 'una', 'uno', 'unos',
|
||||
'unas'),
|
||||
'Spanish': ('la', 'lo', 'el', 'las', 'un', 'los', 'una', 'al', 'del',
|
||||
'unos', 'unas', 'uno'),
|
||||
'French': ('le', "l'", 'la', 'les', 'un', 'une', 'des', 'au', 'du', '\xc3\xa0 la',
|
||||
'de la', 'aux'),
|
||||
'Portuguese': ('a', 'as', 'o', 'os', 'um', 'uns', 'uma', 'umas'),
|
||||
'Turkish': (), # Some languages doesn't have articles.
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python2
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
generatepot.py script.
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python2
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
"""Generate binary message catalog from textual translation description.
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python2
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
rebuildmo.py script.
|
||||
|
||||
|
|
|
@ -104,15 +104,24 @@ PY_VERSION = sys.version_info[:2]
|
|||
# The cookies for the "adult" search.
|
||||
# Please don't mess with these account.
|
||||
# Old 'IMDbPY' account.
|
||||
_old_cookie_id = 'boM2bYxz9MCsOnH9gZ0S9QHs12NWrNdApxsls1Vb5/NGrNdjcHx3dUas10UASoAjVEvhAbGagERgOpNkAPvxdbfKwaV2ikEj9SzXY1WPxABmDKQwdqzwRbM+12NSeJFGUEx3F8as10WwidLzVshDtxaPIbP13NdjVS9UZTYqgTVGrNcT9vyXU1'
|
||||
_old_cookie_uu = '3M3AXsquTU5Gur/Svik+ewflPm5Rk2ieY3BIPlLjyK3C0Dp9F8UoPgbTyKiGtZp4x1X+uAUGKD7BM2g+dVd8eqEzDErCoYvdcvGLvVLAen1y08hNQtALjVKAe+1hM8g9QbNonlG1/t4S82ieUsBbrSIQbq1yhV6tZ6ArvSbA7rgHc8n5AdReyAmDaJ5Wm/ee3VDoCnGj/LlBs2ieUZNorhHDKK5Q=='
|
||||
# New 'IMDbPYweb' account.
|
||||
_cookie_id = 'rH1jNAkjTlNXvHolvBVBsgaPICNZbNdjVjzFwzas9JRmusdjVoqBs/Hs12NR+1WFxEoR9bGKEDUg6sNlADqXwkas12N131Rwdb+UQNGKN8PWrNdjcdqBQVLq8mbGDHP3hqzxhbD692NQi9D0JjpBtRaPIbP1zNdjUOqENQYv1ADWrNcT9vyXU1'
|
||||
_cookie_uu = 'su4/m8cho4c6HP+W1qgq6wchOmhnF0w+lIWvHjRUPJ6nRA9sccEafjGADJ6hQGrMd4GKqLcz2X4z5+w+M4OIKnRn7FpENH7dxDQu3bQEHyx0ZEyeRFTPHfQEX03XF+yeN1dsPpcXaqjUZAw+lGRfXRQEfz3RIX9IgVEffdBAHw2wQXyf9xdMPrQELw0QNB8dsffsqcdQemjPB0w+moLcPh0JrKrHJ9hjBzdMPpcXTH7XRwwOk='
|
||||
_IMDbPY_cookie_id = 'boM2bYxz9MCsOnH9gZ0S9QHs12NWrNdApxsls1Vb5/NGrNdjcHx3dUas10UASoAjVEvhAbGagERgOpNkAPvxdbfKwaV2ikEj9SzXY1WPxABmDKQwdqzwRbM+12NSeJFGUEx3F8as10WwidLzVshDtxaPIbP13NdjVS9UZTYqgTVGrNcT9vyXU1'
|
||||
_IMDbPY_cookie_uu = '3M3AXsquTU5Gur/Svik+ewflPm5Rk2ieY3BIPlLjyK3C0Dp9F8UoPgbTyKiGtZp4x1X+uAUGKD7BM2g+dVd8eqEzDErCoYvdcvGLvVLAen1y08hNQtALjVKAe+1hM8g9QbNonlG1/t4S82ieUsBbrSIQbq1yhV6tZ6ArvSbA7rgHc8n5AdReyAmDaJ5Wm/ee3VDoCnGj/LlBs2ieUZNorhHDKK5Q=='
|
||||
# 'imdbpy2010' account.
|
||||
_imdbpy2010_cookie_id = 'QrCdxVi+L+WgqOLrQJJgBgRRXGInphxiBPU/YXSFDyExMFzCp6YcYgSVXyEUhS/xMID8wqemHGID4DlntwZ49vemP5UXsAxiJ4D6goSmHGIgNT9hMXBaRSF2vMS3phxB0bVfQiQlP1RxdrzhB6YcRHFASyIhQVowwXCKtDSlD2YhgRvxBsCKtGemHBKH9mxSI='
|
||||
_imdbpy2010_cookie_uu = 'oiEo2yoJFCA2Zbn/o7Z1LAPIwotAu6QdALv3foDb1x5F/tdrFY63XkSfty4kntS8Y8jkHSDLt3406+d+JThEilPI0mtTaOQdA/t2/iErp22jaLdeVU5ya4PIREpj7HFdpzhEHadcIAngSER50IoHDpD6Bz4Qy3b+UIhE/hBbhz5Q63ceA2hEvhPo5B0FnrL9Q8jkWjDIbA0Au3d+AOtnXoCIRL4Q28c+UOtnXpP4RL4T6OQdA+6ijUCI5B0AW2d+UOtnXpPYRL4T6OQdA8jkTUOYlC0A=='
|
||||
# old 'IMDbPYweb' account.
|
||||
_old_IMDbPYweb_cookie_id = 'rH1jNAkjTlNXvHolvBVBsgaPICNZbNdjVjzFwzas9JRmusdjVoqBs/Hs12NR+1WFxEoR9bGKEDUg6sNlADqXwkas12N131Rwdb+UQNGKN8PWrNdjcdqBQVLq8mbGDHP3hqzxhbD692NQi9D0JjpBtRaPIbP1zNdjUOqENQYv1ADWrNcT9vyXU1'
|
||||
_old_IMDbPYweb_cookie_uu = 'su4/m8cho4c6HP+W1qgq6wchOmhnF0w+lIWvHjRUPJ6nRA9sccEafjGADJ6hQGrMd4GKqLcz2X4z5+w+M4OIKnRn7FpENH7dxDQu3bQEHyx0ZEyeRFTPHfQEX03XF+yeN1dsPpcXaqjUZAw+lGRfXRQEfz3RIX9IgVEffdBAHw2wQXyf9xdMPrQELw0QNB8dsffsqcdQemjPB0w+moLcPh0JrKrHJ9hjBzdMPpcXTH7XRwwOk='
|
||||
# old 'IMDbPYweb' account values (as of 2012-12-30)
|
||||
_IMDbPYweb_cookie_id = 'BCYjtpb46Go0cMHAMewWZEauhwqPL7ASCPpPVNutu6BuayHZd0U6Dk3UAqVlEM8DHLDsSr02RGQn5ff3245-R4A130NAWJ_5yqXx7X-zJey8vQM8JKdv3rTUSEJznJQlojUW1Bije-Q0FXAixs4I0sePWhd_tA41i-9AF2q3lPmaksram6ilMhN9i3IPESW1PMbk'
|
||||
_IMDbPYweb_cookie_uu = 'BCYttQjEMc-NyUdFUGxThidAnBo7wwalEzj4un9uzf2XoEjtqDhNfrH7bOSuwlRkMEQ11SNyTajl-b9Q-21m4HwYu0e3jXZrjYLXLYzFkrEroCDyUREqaTwPJPSjGtFmvlaVBZEZmsWpaxe18DT5KiygKyGPZKH78Xu4im6ba-Sd31WvbXHzP8KGXPpGjhhVuv7Dcv314HCWkE832Srf9ya-Uv0FdGAmYyLbIAXuxnvpYQd6oZ8-CYkSGLIqcKWdrf5S'
|
||||
# 'IMDbPY2013' account
|
||||
_IMDbPY2013_cookie_id = 'BCYmoyqSm2WglmOzG-SrFWSvVpxsTZOB0qEOOqmAwCBxCbaNgKOxd0DTKzUvt7t04Pya5gV2tUrpDmYxrc1Dr54DQj2UXI7QI35__M5-HI2KrbOI3PjDz6M-_U3HG8topMfN64R24tmBixoZhMYXVaEc556lf0Z4gQNJVYRANXvwytP5v1lpfeToRlu9aVJwN4kT'
|
||||
_IMDbPY2013_cookie_uu = 'BCYquDS8Y2i8R1pJxS4nB77YrhjHHXeOea2Xl9KtZvE6RZKVfMvzTGU4Vl5-yxfPbgRSiFJasyf-hhPuVvXyaHlfeBjNlbFT8hz2HzFFkQ_SxKxq05J51gi7Fv4SaAws1M-i7zmQ1TRunfJqCVIYqPwIs2NO7s4_YDH2ZoISVGLgca8OY2K58HychOZB1oRWHVeAJNhLJMrCWJBuGRLCNnQK5X9tA0dPPntr2Ussy0ouul-N1GQz-8y5vda3JJ_C6xkwmHcA6JrOdOFO_HqMWjVSXuxGEdrXC919JM9H0vooVvKeVgAEJnTh2GiVlUJUoH3c'
|
||||
|
||||
# imdbpy2010 account.
|
||||
#_cookie_id = 'QrCdxVi+L+WgqOLrQJJgBgRRXGInphxiBPU/YXSFDyExMFzCp6YcYgSVXyEUhS/xMID8wqemHGID4DlntwZ49vemP5UXsAxiJ4D6goSmHGIgNT9hMXBaRSF2vMS3phxB0bVfQiQlP1RxdrzhB6YcRHFASyIhQVowwXCKtDSlD2YhgRvxBsCKtGemHBKH9mxSI='
|
||||
#_cookie_uu = 'oiEo2yoJFCA2Zbn/o7Z1LAPIwotAu6QdALv3foDb1x5F/tdrFY63XkSfty4kntS8Y8jkHSDLt3406+d+JThEilPI0mtTaOQdA/t2/iErp22jaLdeVU5ya4PIREpj7HFdpzhEHadcIAngSER50IoHDpD6Bz4Qy3b+UIhE/hBbhz5Q63ceA2hEvhPo5B0FnrL9Q8jkWjDIbA0Au3d+AOtnXoCIRL4Q28c+UOtnXpP4RL4T6OQdA+6ijUCI5B0AW2d+UOtnXpPYRL4T6OQdA8jkTUOYlC0A=='
|
||||
# Currently used account.
|
||||
_cookie_id = _IMDbPY2013_cookie_id
|
||||
_cookie_uu = _IMDbPY2013_cookie_uu
|
||||
|
||||
|
||||
class _FakeURLOpener(object):
|
||||
|
@ -141,9 +150,10 @@ class IMDbURLopener(FancyURLopener):
|
|||
for header in ('User-Agent', 'User-agent', 'user-agent'):
|
||||
self.del_header(header)
|
||||
self.set_header('User-Agent', 'Mozilla/5.0')
|
||||
self.set_header('Accept-Language', 'en-us,en;q=0.5')
|
||||
# XXX: This class is used also to perform "Exact Primary
|
||||
# [Title|Name]" searches, and so by default the cookie is set.
|
||||
c_header = 'id=%s; uu=%s' % (_cookie_id, _cookie_uu)
|
||||
c_header = 'uu=%s; id=%s' % (_cookie_uu, _cookie_id)
|
||||
self.set_header('Cookie', c_header)
|
||||
|
||||
def get_proxy(self):
|
||||
|
@ -199,12 +209,11 @@ class IMDbURLopener(FancyURLopener):
|
|||
server_encode = uopener.info().getparam('charset')
|
||||
# Otherwise, look at the content-type HTML meta tag.
|
||||
if server_encode is None and content:
|
||||
first_bytes = content[:512]
|
||||
begin_h = first_bytes.find('text/html; charset=')
|
||||
begin_h = content.find('text/html; charset=')
|
||||
if begin_h != -1:
|
||||
end_h = first_bytes[19+begin_h:].find('"')
|
||||
end_h = content[19+begin_h:].find('"')
|
||||
if end_h != -1:
|
||||
server_encode = first_bytes[19+begin_h:19+begin_h+end_h]
|
||||
server_encode = content[19+begin_h:19+begin_h+end_h]
|
||||
if server_encode:
|
||||
try:
|
||||
if lookup(server_encode):
|
||||
|
@ -455,16 +464,16 @@ class IMDbHTTPAccessSystem(IMDbBase):
|
|||
results is the maximum number of results to be retrieved."""
|
||||
if isinstance(ton, unicode):
|
||||
try:
|
||||
ton = ton.encode('iso8859-1')
|
||||
ton = ton.encode('utf-8')
|
||||
except Exception, e:
|
||||
try:
|
||||
ton = ton.encode('utf-8')
|
||||
ton = ton.encode('iso8859-1')
|
||||
except Exception, e:
|
||||
pass
|
||||
##params = 'q=%s&%s=on&mx=%s' % (quote_plus(ton), kind, str(results))
|
||||
params = 'q=%s;s=%s;mx=%s' % (quote_plus(ton), kind, str(results))
|
||||
params = 'q=%s&s=%s&mx=%s' % (quote_plus(ton), kind, str(results))
|
||||
if kind == 'ep':
|
||||
params = params.replace('s=ep;', 's=tt;ttype=ep;', 1)
|
||||
params = params.replace('s=ep&', 's=tt&ttype=ep&', 1)
|
||||
cont = self._retrieve(self.urls['find'] % params)
|
||||
#print 'URL:', imdbURL_find % params
|
||||
if cont.find('Your search returned more than') == -1 or \
|
||||
|
@ -472,7 +481,7 @@ class IMDbHTTPAccessSystem(IMDbBase):
|
|||
return cont
|
||||
# The retrieved page contains no results, because too many
|
||||
# titles or names contain the string we're looking for.
|
||||
params = 'q=%s;ls=%s;lm=0' % (quote_plus(ton), kind)
|
||||
params = 'q=%s&ls=%s&lm=0' % (quote_plus(ton), kind)
|
||||
size = 131072 + results * 512
|
||||
return self._retrieve(self.urls['find'] % params, size=size)
|
||||
|
||||
|
@ -587,6 +596,10 @@ class IMDbHTTPAccessSystem(IMDbBase):
|
|||
cont = self._retrieve(self.urls['movie_main'] % movieID + 'recommendations')
|
||||
return self.mProxy.rec_parser.parse(cont)
|
||||
|
||||
def get_movie_critic_reviews(self, movieID):
|
||||
cont = self._retrieve(self.urls['movie_main'] % movieID + 'criticreviews')
|
||||
return self.mProxy.criticrev_parser.parse(cont)
|
||||
|
||||
def get_movie_external_reviews(self, movieID):
|
||||
cont = self._retrieve(self.urls['movie_main'] % movieID + 'externalreviews')
|
||||
return self.mProxy.externalrev_parser.parse(cont)
|
||||
|
@ -754,7 +767,7 @@ class IMDbHTTPAccessSystem(IMDbBase):
|
|||
return self.pProxy.person_keywords_parser.parse(cont)
|
||||
|
||||
def _search_character(self, name, results):
|
||||
cont = self._get_search_content('char', name, results)
|
||||
cont = self._get_search_content('ch', name, results)
|
||||
return self.scProxy.search_character_parser.parse(cont, results=results)['data']
|
||||
|
||||
def get_character_main(self, characterID):
|
||||
|
|
|
@ -9,7 +9,7 @@ pages would be:
|
|||
plot summary: http://akas.imdb.com/title/tt0094226/plotsummary
|
||||
...and so on...
|
||||
|
||||
Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
|
@ -531,9 +531,6 @@ class DOMHTMLMovieParser(DOMParserBase):
|
|||
def _process_plotsummary(x):
|
||||
"""Process a plot (contributed by Rdian06)."""
|
||||
xauthor = x.get('author')
|
||||
if xauthor:
|
||||
xauthor = xauthor.replace('{', '<').replace('}', '>').replace('(',
|
||||
'<').replace(')', '>').strip()
|
||||
xplot = x.get('plot', u'').strip()
|
||||
if xauthor:
|
||||
xplot += u'::%s' % xauthor
|
||||
|
@ -555,17 +552,20 @@ class DOMHTMLPlotParser(DOMParserBase):
|
|||
# Notice that recently IMDb started to put the email of the
|
||||
# author only in the link, that we're not collecting, here.
|
||||
extractors = [Extractor(label='plot',
|
||||
path="//p[@class='plotpar']",
|
||||
path="//ul[@class='zebraList']//p",
|
||||
attrs=Attribute(key='plot',
|
||||
multi=True,
|
||||
path={'plot': './text()',
|
||||
'author': './i/a/text()'},
|
||||
path={'plot': './text()[1]',
|
||||
'author': './span/em/a/text()'},
|
||||
postprocess=_process_plotsummary))]
|
||||
|
||||
|
||||
def _process_award(x):
|
||||
award = {}
|
||||
award['award'] = x.get('award').strip()
|
||||
_award = x.get('award')
|
||||
if _award is not None:
|
||||
_award = _award.strip()
|
||||
award['award'] = _award
|
||||
if not award['award']:
|
||||
return {}
|
||||
award['year'] = x.get('year').strip()
|
||||
|
@ -709,10 +709,16 @@ class DOMHTMLTaglinesParser(DOMParserBase):
|
|||
result = tparser.parse(taglines_html_string)
|
||||
"""
|
||||
extractors = [Extractor(label='taglines',
|
||||
path="//div[@id='tn15content']/p",
|
||||
attrs=Attribute(key='taglines', multi=True,
|
||||
path='//*[contains(concat(" ", normalize-space(@class), " "), " soda ")]',
|
||||
attrs=Attribute(key='taglines',
|
||||
multi=True,
|
||||
path="./text()"))]
|
||||
|
||||
def postprocess_data(self, data):
|
||||
if 'taglines' in data:
|
||||
data['taglines'] = [tagline.strip() for tagline in data['taglines']]
|
||||
return data
|
||||
|
||||
|
||||
class DOMHTMLKeywordsParser(DOMParserBase):
|
||||
"""Parser for the "keywords" page of a given movie.
|
||||
|
@ -785,9 +791,9 @@ class DOMHTMLSoundtrackParser(DOMHTMLAlternateVersionsParser):
|
|||
]
|
||||
|
||||
def postprocess_data(self, data):
|
||||
if 'soundtrack' in data:
|
||||
if 'alternate versions' in data:
|
||||
nd = []
|
||||
for x in data['soundtrack']:
|
||||
for x in data['alternate versions']:
|
||||
ds = x.split('\n')
|
||||
title = ds[0]
|
||||
if title[0] == '"' and title[-1] == '"':
|
||||
|
@ -846,6 +852,13 @@ class DOMHTMLCrazyCreditsParser(DOMParserBase):
|
|||
x.replace('\n', ' ').replace(' ', ' ')))]
|
||||
|
||||
|
||||
def _process_goof(x):
|
||||
if x['spoiler_category']:
|
||||
return x['spoiler_category'].strip() + ': SPOILER: ' + x['text'].strip()
|
||||
else:
|
||||
return x['category'].strip() + ': ' + x['text'].strip()
|
||||
|
||||
|
||||
class DOMHTMLGoofsParser(DOMParserBase):
|
||||
"""Parser for the "goofs" page of a given movie.
|
||||
The page should be provided as a string, as taken from
|
||||
|
@ -858,9 +871,14 @@ class DOMHTMLGoofsParser(DOMParserBase):
|
|||
"""
|
||||
_defGetRefs = True
|
||||
|
||||
extractors = [Extractor(label='goofs', path="//ul[@class='trivia']/li",
|
||||
attrs=Attribute(key='goofs', multi=True, path=".//text()",
|
||||
postprocess=lambda x: (x or u'').strip()))]
|
||||
extractors = [Extractor(label='goofs', path="//div[@class='soda odd']",
|
||||
attrs=Attribute(key='goofs', multi=True,
|
||||
path={
|
||||
'text':"./text()",
|
||||
'category':'./preceding-sibling::h4[1]/text()',
|
||||
'spoiler_category': './h4/text()'
|
||||
},
|
||||
postprocess=_process_goof))]
|
||||
|
||||
|
||||
class DOMHTMLQuotesParser(DOMParserBase):
|
||||
|
@ -876,9 +894,16 @@ class DOMHTMLQuotesParser(DOMParserBase):
|
|||
_defGetRefs = True
|
||||
|
||||
extractors = [
|
||||
Extractor(label='quotes',
|
||||
path="//div[@class='_imdbpy']",
|
||||
attrs=Attribute(key='quotes',
|
||||
Extractor(label='quotes_odd',
|
||||
path="//div[@class='quote soda odd']",
|
||||
attrs=Attribute(key='quotes_odd',
|
||||
multi=True,
|
||||
path=".//text()",
|
||||
postprocess=lambda x: x.strip().replace(' \n',
|
||||
'::').replace('::\n', '::').replace('\n', ' '))),
|
||||
Extractor(label='quotes_even',
|
||||
path="//div[@class='quote soda even']",
|
||||
attrs=Attribute(key='quotes_even',
|
||||
multi=True,
|
||||
path=".//text()",
|
||||
postprocess=lambda x: x.strip().replace(' \n',
|
||||
|
@ -886,27 +911,23 @@ class DOMHTMLQuotesParser(DOMParserBase):
|
|||
]
|
||||
|
||||
preprocessors = [
|
||||
(re.compile('(<a name="?qt[0-9]{7}"?></a>)', re.I),
|
||||
r'\1<div class="_imdbpy">'),
|
||||
(re.compile('<hr width="30%">', re.I), '</div>'),
|
||||
(re.compile('<hr/>', re.I), '</div>'),
|
||||
(re.compile('<script.*?</script>', re.I|re.S), ''),
|
||||
# For BeautifulSoup.
|
||||
(re.compile('<!-- sid: t-channel : MIDDLE_CENTER -->', re.I), '</div>')
|
||||
(re.compile('<a href="#" class="hidesoda hidden">Hide options</a><br>', re.I), '')
|
||||
]
|
||||
|
||||
def preprocess_dom(self, dom):
|
||||
# Remove "link this quote" links.
|
||||
for qLink in self.xpath(dom, "//p[@class='linksoda']"):
|
||||
for qLink in self.xpath(dom, "//span[@class='linksoda']"):
|
||||
qLink.drop_tree()
|
||||
for qLink in self.xpath(dom, "//div[@class='sharesoda_pre']"):
|
||||
qLink.drop_tree()
|
||||
return dom
|
||||
|
||||
def postprocess_data(self, data):
|
||||
if 'quotes' not in data:
|
||||
quotes = data.get('quotes_odd', []) + data.get('quotes_even', [])
|
||||
if not quotes:
|
||||
return {}
|
||||
for idx, quote in enumerate(data['quotes']):
|
||||
data['quotes'][idx] = quote.split('::')
|
||||
return data
|
||||
quotes = [q.split('::') for q in quotes]
|
||||
return {'quotes': quotes}
|
||||
|
||||
|
||||
class DOMHTMLReleaseinfoParser(DOMParserBase):
|
||||
|
@ -920,13 +941,13 @@ class DOMHTMLReleaseinfoParser(DOMParserBase):
|
|||
result = rdparser.parse(releaseinfo_html_string)
|
||||
"""
|
||||
extractors = [Extractor(label='release dates',
|
||||
path="//th[@class='xxxx']/../../tr",
|
||||
path="//table[@id='release_dates']//tr",
|
||||
attrs=Attribute(key='release dates', multi=True,
|
||||
path={'country': ".//td[1]//text()",
|
||||
'date': ".//td[2]//text()",
|
||||
'notes': ".//td[3]//text()"})),
|
||||
Extractor(label='akas',
|
||||
path="//div[@class='_imdbpy_akas']/table/tr",
|
||||
path="//table[@id='akas']//tr",
|
||||
attrs=Attribute(key='akas', multi=True,
|
||||
path={'title': "./td[1]/text()",
|
||||
'countries': "./td[2]/text()"}))]
|
||||
|
@ -961,7 +982,7 @@ class DOMHTMLReleaseinfoParser(DOMParserBase):
|
|||
title = (aka.get('title') or '').strip()
|
||||
if not title:
|
||||
continue
|
||||
countries = (aka.get('countries') or '').split('/')
|
||||
countries = (aka.get('countries') or '').split(',')
|
||||
if not countries:
|
||||
nakas.append(title)
|
||||
else:
|
||||
|
@ -1135,6 +1156,27 @@ def _normalize_href(href):
|
|||
href = '%s%s' % (imdbURL_base, href)
|
||||
return href
|
||||
|
||||
class DOMHTMLCriticReviewsParser(DOMParserBase):
|
||||
"""Parser for the "critic reviews" pages of a given movie.
|
||||
The page should be provided as a string, as taken from
|
||||
the akas.imdb.com server. The final result will be a
|
||||
dictionary, with a key for every relevant section.
|
||||
|
||||
Example:
|
||||
osparser = DOMHTMLCriticReviewsParser()
|
||||
result = osparser.parse(officialsites_html_string)
|
||||
"""
|
||||
kind = 'critic reviews'
|
||||
|
||||
extractors = [
|
||||
Extractor(label='metascore',
|
||||
path="//div[@class='metascore_wrap']/div/span",
|
||||
attrs=Attribute(key='metascore',
|
||||
path=".//text()")),
|
||||
Extractor(label='metacritic url',
|
||||
path="//div[@class='article']/div[@class='see-more']/a",
|
||||
attrs=Attribute(key='metacritic url',
|
||||
path="./@href")) ]
|
||||
|
||||
class DOMHTMLOfficialsitesParser(DOMParserBase):
|
||||
"""Parser for the "official sites", "external reviews", "newsgroup
|
||||
|
@ -1471,6 +1513,14 @@ class DOMHTMLSeasonEpisodesParser(DOMParserBase):
|
|||
try: selected_season = int(selected_season)
|
||||
except: pass
|
||||
nd = {selected_season: {}}
|
||||
if 'episode -1' in data:
|
||||
counter = 1
|
||||
for episode in data['episode -1']:
|
||||
while 'episode %d' % counter in data:
|
||||
counter += 1
|
||||
k = 'episode %d' % counter
|
||||
data[k] = [episode]
|
||||
del data['episode -1']
|
||||
for episode_nr, episode in data.iteritems():
|
||||
if not (episode and episode[0] and
|
||||
episode_nr.startswith('episode ')):
|
||||
|
@ -1860,6 +1910,8 @@ _OBJECTS = {
|
|||
'releasedates_parser': ((DOMHTMLReleaseinfoParser,), None),
|
||||
'ratings_parser': ((DOMHTMLRatingsParser,), None),
|
||||
'officialsites_parser': ((DOMHTMLOfficialsitesParser,), None),
|
||||
'criticrev_parser': ((DOMHTMLCriticReviewsParser,),
|
||||
{'kind': 'critic reviews'}),
|
||||
'externalrev_parser': ((DOMHTMLOfficialsitesParser,),
|
||||
{'kind': 'external reviews'}),
|
||||
'newsgrouprev_parser': ((DOMHTMLOfficialsitesParser,),
|
||||
|
|
|
@ -8,7 +8,7 @@ E.g., for "Mel Gibson" the referred pages would be:
|
|||
biography: http://akas.imdb.com/name/nm0000154/bio
|
||||
...and so on...
|
||||
|
||||
Copyright 2004-20101 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
|
@ -60,6 +60,7 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
|
|||
result = cparser.parse(categorized_html_string)
|
||||
"""
|
||||
_containsObjects = True
|
||||
_name_imdb_index = re.compile(r'\([IVXLCDM]+\)')
|
||||
|
||||
_birth_attrs = [Attribute(key='birth date',
|
||||
path='.//time[@itemprop="birthDate"]/@datetime'),
|
||||
|
@ -100,6 +101,10 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
|
|||
path=".//text()",
|
||||
postprocess=lambda x: analyze_name(x,
|
||||
canonical=1))),
|
||||
Extractor(label='name_index',
|
||||
path="//h1[@class='header']/span[1]",
|
||||
attrs=Attribute(key='name_index',
|
||||
path="./text()")),
|
||||
|
||||
Extractor(label='birth info',
|
||||
path="//div[h4='Born:']",
|
||||
|
@ -110,7 +115,7 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
|
|||
attrs=_death_attrs),
|
||||
|
||||
Extractor(label='headshot',
|
||||
path="//td[@id='img_primary']/a",
|
||||
path="//td[@id='img_primary']/div[@class='image']/a",
|
||||
attrs=Attribute(key='headshot',
|
||||
path="./img/@src")),
|
||||
|
||||
|
@ -152,6 +157,11 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
|
|||
for what in 'birth date', 'death date':
|
||||
if what in data and not data[what]:
|
||||
del data[what]
|
||||
name_index = (data.get('name_index') or '').strip()
|
||||
if name_index:
|
||||
if self._name_imdb_index.match(name_index):
|
||||
data['imdbIndex'] = name_index[1:-1]
|
||||
del data['name_index']
|
||||
# XXX: the code below is for backwards compatibility
|
||||
# probably could be removed
|
||||
for key in data.keys():
|
||||
|
@ -220,13 +230,13 @@ class DOMHTMLBioParser(DOMParserBase):
|
|||
attrs=Attribute(key='headshot',
|
||||
path="./img/@src")),
|
||||
Extractor(label='birth info',
|
||||
path="//div[h5='Date of Birth']",
|
||||
path="//table[@id='overviewTable']//td[text()='Date of Birth']/following-sibling::td[1]",
|
||||
attrs=_birth_attrs),
|
||||
Extractor(label='death info',
|
||||
path="//div[h5='Date of Death']",
|
||||
path="//table[@id='overviewTable']//td[text()='Date of Death']/following-sibling::td[1]",
|
||||
attrs=_death_attrs),
|
||||
Extractor(label='nick names',
|
||||
path="//div[h5='Nickname']",
|
||||
path="//table[@id='overviewTable']//td[text()='Nickenames']/following-sibling::td[1]",
|
||||
attrs=Attribute(key='nick names',
|
||||
path="./text()",
|
||||
joiner='|',
|
||||
|
@ -234,25 +244,25 @@ class DOMHTMLBioParser(DOMParserBase):
|
|||
'::(', 1) for n in x.split('|')
|
||||
if n.strip()])),
|
||||
Extractor(label='birth name',
|
||||
path="//div[h5='Birth Name']",
|
||||
path="//table[@id='overviewTable']//td[text()='Birth Name']/following-sibling::td[1]",
|
||||
attrs=Attribute(key='birth name',
|
||||
path="./text()",
|
||||
postprocess=lambda x: canonicalName(x.strip()))),
|
||||
Extractor(label='height',
|
||||
path="//div[h5='Height']",
|
||||
path="//table[@id='overviewTable']//td[text()='Height']/following-sibling::td[1]",
|
||||
attrs=Attribute(key='height',
|
||||
path="./text()",
|
||||
postprocess=lambda x: x.strip())),
|
||||
Extractor(label='mini biography',
|
||||
path="//div[h5='Mini Biography']",
|
||||
path="//a[@name='mini_bio']/following-sibling::div[1 = count(preceding-sibling::a[1] | ../a[@name='mini_bio'])]",
|
||||
attrs=Attribute(key='mini biography',
|
||||
multi=True,
|
||||
path={
|
||||
'bio': "./p//text()",
|
||||
'by': "./b/following-sibling::a/text()"
|
||||
'bio': ".//text()",
|
||||
'by': ".//a[@name='ba']//text()"
|
||||
},
|
||||
postprocess=lambda x: "%s::%s" % \
|
||||
(x.get('bio').strip(),
|
||||
((x.get('bio') or u'').split('- IMDb Mini Biography By:')[0].strip(),
|
||||
(x.get('by') or u'').strip() or u'Anonymous'))),
|
||||
Extractor(label='spouse',
|
||||
path="//div[h5='Spouse']/table/tr",
|
||||
|
|
|
@ -5,9 +5,9 @@ This module provides the HTMLSearchCharacterParser class (and the
|
|||
search_character_parser instance), used to parse the results of a search
|
||||
for a given character.
|
||||
E.g., when searching for the name "Jesse James", the parsed page would be:
|
||||
http://akas.imdb.com/find?s=Characters;mx=20;q=Jesse+James
|
||||
http://akas.imdb.com/find?s=ch;mx=20;q=Jesse+James
|
||||
|
||||
Copyright 2007-2009 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2007-2012 Davide Alberani <da@erlug.linux.it>
|
||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
|
@ -42,7 +42,7 @@ class DOMBasicCharacterParser(DOMBasicMovieParser):
|
|||
|
||||
class DOMHTMLSearchCharacterParser(DOMHTMLSearchMovieParser):
|
||||
_BaseParser = DOMBasicCharacterParser
|
||||
_notDirectHitTitle = '<title>imdb search'
|
||||
_notDirectHitTitle = '<title>find - imdb'
|
||||
_titleBuilder = lambda self, x: build_name(x, canonical=False)
|
||||
_linkPrefix = '/character/ch'
|
||||
|
||||
|
@ -57,7 +57,7 @@ class DOMHTMLSearchCharacterParser(DOMHTMLSearchMovieParser):
|
|||
{'name': x.get('name')}
|
||||
))]
|
||||
extractors = [Extractor(label='search',
|
||||
path="//td[3]/a[starts-with(@href, " \
|
||||
path="//td[@class='result_text']/a[starts-with(@href, " \
|
||||
"'/character/ch')]/..",
|
||||
attrs=_attrs)]
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ for a given company.
|
|||
E.g., when searching for the name "Columbia Pictures", the parsed page would be:
|
||||
http://akas.imdb.com/find?s=co;mx=20;q=Columbia+Pictures
|
||||
|
||||
Copyright 2008-2009 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2008-2012 Davide Alberani <da@erlug.linux.it>
|
||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
|
@ -42,7 +42,7 @@ class DOMBasicCompanyParser(DOMBasicMovieParser):
|
|||
|
||||
class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser):
|
||||
_BaseParser = DOMBasicCompanyParser
|
||||
_notDirectHitTitle = '<title>imdb company'
|
||||
_notDirectHitTitle = '<title>find - imdb'
|
||||
_titleBuilder = lambda self, x: build_company_name(x)
|
||||
_linkPrefix = '/company/co'
|
||||
|
||||
|
@ -59,7 +59,7 @@ class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser):
|
|||
or u''), stripNotes=True)
|
||||
))]
|
||||
extractors = [Extractor(label='search',
|
||||
path="//td[3]/a[starts-with(@href, " \
|
||||
path="//td[@class='result_text']/a[starts-with(@href, " \
|
||||
"'/company/co')]/..",
|
||||
attrs=_attrs)]
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ E.g., for when searching for the title "the passion", the parsed
|
|||
page would be:
|
||||
http://akas.imdb.com/find?q=the+passion&tt=on&mx=20
|
||||
|
||||
Copyright 2004-2010 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
|
@ -77,7 +77,7 @@ class DOMBasicMovieParser(DOMParserBase):
|
|||
def custom_analyze_title(title):
|
||||
"""Remove garbage notes after the (year), (year/imdbIndex) or (year) (TV)"""
|
||||
# XXX: very crappy. :-(
|
||||
nt = title.split(' ')[0]
|
||||
nt = title.split(' aka ')[0]
|
||||
if nt:
|
||||
title = nt
|
||||
if not title:
|
||||
|
@ -92,7 +92,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
|||
"new search system" is used, for movies."""
|
||||
|
||||
_BaseParser = DOMBasicMovieParser
|
||||
_notDirectHitTitle = '<title>imdb title'
|
||||
_notDirectHitTitle = '<title>find - imdb</title>'
|
||||
_titleBuilder = lambda self, x: build_title(x)
|
||||
_linkPrefix = '/title/tt'
|
||||
|
||||
|
@ -101,8 +101,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
|||
path={
|
||||
'link': "./a[1]/@href",
|
||||
'info': ".//text()",
|
||||
#'akas': ".//div[@class='_imdbpyAKA']//text()"
|
||||
'akas': ".//p[@class='find-aka']//text()"
|
||||
'akas': "./i//text()"
|
||||
},
|
||||
postprocess=lambda x: (
|
||||
analyze_imdbid(x.get('link') or u''),
|
||||
|
@ -110,7 +109,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
|||
x.get('akas')
|
||||
))]
|
||||
extractors = [Extractor(label='search',
|
||||
path="//td[3]/a[starts-with(@href, '/title/tt')]/..",
|
||||
path="//td[@class='result_text']",
|
||||
attrs=_attrs)]
|
||||
def _init(self):
|
||||
self.url = u''
|
||||
|
@ -119,14 +118,11 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
|||
self.url = u''
|
||||
|
||||
def preprocess_string(self, html_string):
|
||||
if self._notDirectHitTitle in html_string[:1024].lower():
|
||||
if self._notDirectHitTitle in html_string[:10240].lower():
|
||||
if self._linkPrefix == '/title/tt':
|
||||
# Only for movies.
|
||||
# XXX (HTU): does this still apply?
|
||||
html_string = html_string.replace('(TV mini-series)', '(mini)')
|
||||
html_string = html_string.replace('<p class="find-aka">',
|
||||
'<p class="find-aka">::')
|
||||
#html_string = _reAKAStitles.sub(
|
||||
# r'<div class="_imdbpyAKA">\1::</div>\2', html_string)
|
||||
return html_string
|
||||
# Direct hit!
|
||||
dbme = self._BaseParser(useModule=self._useModule)
|
||||
|
@ -141,7 +137,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
|||
title = self._titleBuilder(res[0][1])
|
||||
if not (link and title): return u''
|
||||
link = link.replace('http://pro.imdb.com', '')
|
||||
new_html = '<td></td><td></td><td><a href="%s">%s</a></td>' % (link,
|
||||
new_html = '<td class="result_text"><a href="%s">%s</a></td>' % (link,
|
||||
title)
|
||||
return new_html
|
||||
|
||||
|
@ -161,11 +157,14 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
|||
if not datum[0] and datum[1]:
|
||||
continue
|
||||
if datum[2] is not None:
|
||||
akas = filter(None, datum[2].split('::'))
|
||||
#akas = filter(None, datum[2].split('::'))
|
||||
if self._linkPrefix == '/title/tt':
|
||||
akas = [a.replace('" - ', '::').rstrip() for a in akas]
|
||||
akas = [a.replace('aka "', '', 1).replace('aka "',
|
||||
'', 1).lstrip() for a in akas]
|
||||
# XXX (HTU): couldn't find a result with multiple akas
|
||||
aka = datum[2]
|
||||
akas = [aka[1:-1]] # remove the quotes
|
||||
#akas = [a.replace('" - ', '::').rstrip() for a in akas]
|
||||
#akas = [a.replace('aka "', '', 1).replace('aka "',
|
||||
#'', 1).lstrip() for a in akas]
|
||||
datum[1]['akas'] = akas
|
||||
data['data'][idx] = (datum[0], datum[1])
|
||||
else:
|
||||
|
|
|
@ -7,7 +7,7 @@ for a given person.
|
|||
E.g., when searching for the name "Mel Gibson", the parsed page would be:
|
||||
http://akas.imdb.com/find?q=Mel+Gibson&nm=on&mx=20
|
||||
|
||||
Copyright 2004-2010 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
|
@ -55,7 +55,7 @@ class DOMHTMLSearchPersonParser(DOMHTMLSearchMovieParser):
|
|||
"""Parse the html page that the IMDb web server shows when the
|
||||
"new search system" is used, for persons."""
|
||||
_BaseParser = DOMBasicPersonParser
|
||||
_notDirectHitTitle = '<title>imdb name'
|
||||
_notDirectHitTitle = '<title>find - imdb'
|
||||
_titleBuilder = lambda self, x: build_name(x, canonical=True)
|
||||
_linkPrefix = '/name/nm'
|
||||
|
||||
|
@ -74,11 +74,11 @@ class DOMHTMLSearchPersonParser(DOMHTMLSearchMovieParser):
|
|||
canonical=1), x.get('akas')
|
||||
))]
|
||||
extractors = [Extractor(label='search',
|
||||
path="//td[3]/a[starts-with(@href, '/name/nm')]/..",
|
||||
path="//td[@class='result_text']/a[starts-with(@href, '/name/nm')]/..",
|
||||
attrs=_attrs)]
|
||||
|
||||
def preprocess_string(self, html_string):
|
||||
if self._notDirectHitTitle in html_string[:1024].lower():
|
||||
if self._notDirectHitTitle in html_string[:10240].lower():
|
||||
html_string = _reAKASp.sub(
|
||||
r'\1<div class="_imdbpyAKA">\2::</div>\3',
|
||||
html_string)
|
||||
|
|
|
@ -340,7 +340,7 @@ def build_movie(txt, movieID=None, roleID=None, status=None,
|
|||
title = title[:nidx].rstrip()
|
||||
if year:
|
||||
year = year.strip()
|
||||
if title[-1] == ')':
|
||||
if title[-1:] == ')':
|
||||
fpIdx = title.rfind('(')
|
||||
if fpIdx != -1:
|
||||
if notes: notes = '%s %s' % (title[fpIdx:], notes)
|
||||
|
|
|
@ -6,7 +6,7 @@ IMDb's data for mobile systems.
|
|||
the imdb.IMDb function will return an instance of this class when
|
||||
called with the 'accessSystem' argument set to "mobile".
|
||||
|
||||
Copyright 2005-2011 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2005-2012 Davide Alberani <da@erlug.linux.it>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -193,7 +193,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
|||
title)
|
||||
return res
|
||||
tl = title[0].lower()
|
||||
if not tl.startswith('imdb title'):
|
||||
if not tl.startswith('find - imdb'):
|
||||
# a direct hit!
|
||||
title = _unHtml(title[0])
|
||||
mid = None
|
||||
|
@ -211,7 +211,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
|||
# XXX: this results*3 prevents some recursion errors, but...
|
||||
# it's not exactly understandable (i.e.: why 'results' is
|
||||
# not enough to get all the results?)
|
||||
lis = _findBetween(cont, 'td valign="top">', '</td>',
|
||||
lis = _findBetween(cont, 'td class="result_text">', '</td>',
|
||||
maxRes=results*3)
|
||||
for li in lis:
|
||||
akas = re_makas.findall(li)
|
||||
|
@ -492,7 +492,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
|||
self._mobile_logger.warn('no title tag searching for name %s', name)
|
||||
return res
|
||||
nl = name[0].lower()
|
||||
if not nl.startswith('imdb name'):
|
||||
if not nl.startswith('find - imdb'):
|
||||
# a direct hit!
|
||||
name = _unHtml(name[0])
|
||||
name = name.replace('- Filmography by type' , '').strip()
|
||||
|
@ -506,7 +506,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
|||
return res
|
||||
res[:] = [(str(pid[0]), analyze_name(name, canonical=1))]
|
||||
else:
|
||||
lis = _findBetween(cont, 'td valign="top">', '</td>',
|
||||
lis = _findBetween(cont, 'td class="result_text">', '</td>',
|
||||
maxRes=results*3)
|
||||
for li in lis:
|
||||
akas = _findBetween(li, '<em>"', '"</em>')
|
||||
|
@ -771,7 +771,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
|||
return {'data': d}
|
||||
|
||||
def _search_character(self, name, results):
|
||||
cont = subXMLRefs(self._get_search_content('char', name, results))
|
||||
cont = subXMLRefs(self._get_search_content('ch', name, results))
|
||||
name = _findBetween(cont, '<title>', '</title>', maxRes=1)
|
||||
res = []
|
||||
if not name:
|
||||
|
@ -779,8 +779,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
|||
name)
|
||||
return res
|
||||
nl = name[0].lower()
|
||||
if not (nl.startswith('imdb search') or nl.startswith('imdb search') \
|
||||
or nl.startswith('imdb character')):
|
||||
if not nl.startswith('find - imdb'):
|
||||
# a direct hit!
|
||||
name = _unHtml(name[0]).replace('(Character)', '').strip()
|
||||
pid = None
|
||||
|
@ -793,12 +792,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
|||
return res
|
||||
res[:] = [(str(pid[0]), analyze_name(name))]
|
||||
else:
|
||||
sects = _findBetween(cont, '<b>Popular Characters</b>', '</table>',
|
||||
maxRes=results*3)
|
||||
sects += _findBetween(cont, '<b>Characters', '</table>',
|
||||
maxRes=results*3)
|
||||
for sect in sects:
|
||||
lis = _findBetween(sect, '<a href="/character/',
|
||||
lis = _findBetween(cont, '<td class="result_text"',
|
||||
['<small', '</td>', '<br'])
|
||||
for li in lis:
|
||||
li = '<%s' % li
|
||||
|
|
|
@ -7,7 +7,7 @@ the SQLObject _AND_ SQLAlchemy Object Relational Managers is available.
|
|||
the imdb.IMDb function will return an instance of this class when
|
||||
called with the 'accessSystem' argument set to "sql", "database" or "db".
|
||||
|
||||
Copyright 2005-2010 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2005-2012 Davide Alberani <da@erlug.linux.it>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -452,7 +452,12 @@ def get_movie_data(movieID, kindDict, fromAka=0, _table=None):
|
|||
else:
|
||||
if not fromAka: Table = Title
|
||||
else: Table = AkaTitle
|
||||
try:
|
||||
m = Table.get(movieID)
|
||||
except Exception, e:
|
||||
_aux_logger.warn('Unable to fetch information for movieID %s: %s', movieID, e)
|
||||
mdict = {}
|
||||
return mdict
|
||||
mdict = {'title': m.title, 'kind': kindDict[m.kindID],
|
||||
'year': m.productionYear, 'imdbIndex': m.imdbIndex,
|
||||
'season': m.seasonNr, 'episode': m.episodeNr}
|
||||
|
@ -825,14 +830,14 @@ class IMDbSqlAccessSystem(IMDbBase):
|
|||
imdbID = movie.imdbID
|
||||
if imdbID is not None: return '%07d' % imdbID
|
||||
m_dict = get_movie_data(movie.id, self._kind)
|
||||
titline = build_title(m_dict, ptdf=1)
|
||||
imdbID = self.title2imdbID(titline)
|
||||
titline = build_title(m_dict, ptdf=0)
|
||||
imdbID = self.title2imdbID(titline, m_dict['kind'])
|
||||
# If the imdbID was retrieved from the web and was not in the
|
||||
# database, update the database (ignoring errors, because it's
|
||||
# possibile that the current user has not update privileges).
|
||||
# There're times when I think I'm a genius; this one of
|
||||
# those times... <g>
|
||||
if imdbID is not None:
|
||||
if imdbID is not None and not isinstance(imdbID, list):
|
||||
try: movie.imdbID = int(imdbID)
|
||||
except: pass
|
||||
return imdbID
|
||||
|
@ -847,9 +852,9 @@ class IMDbSqlAccessSystem(IMDbBase):
|
|||
imdbID = person.imdbID
|
||||
if imdbID is not None: return '%07d' % imdbID
|
||||
n_dict = {'name': person.name, 'imdbIndex': person.imdbIndex}
|
||||
namline = build_name(n_dict, canonical=1)
|
||||
namline = build_name(n_dict, canonical=False)
|
||||
imdbID = self.name2imdbID(namline)
|
||||
if imdbID is not None:
|
||||
if imdbID is not None and not isinstance(imdbID, list):
|
||||
try: person.imdbID = int(imdbID)
|
||||
except: pass
|
||||
return imdbID
|
||||
|
@ -864,9 +869,9 @@ class IMDbSqlAccessSystem(IMDbBase):
|
|||
imdbID = character.imdbID
|
||||
if imdbID is not None: return '%07d' % imdbID
|
||||
n_dict = {'name': character.name, 'imdbIndex': character.imdbIndex}
|
||||
namline = build_name(n_dict, canonical=1)
|
||||
namline = build_name(n_dict, canonical=False)
|
||||
imdbID = self.character2imdbID(namline)
|
||||
if imdbID is not None:
|
||||
if imdbID is not None and not isinstance(imdbID, list):
|
||||
try: character.imdbID = int(imdbID)
|
||||
except: pass
|
||||
return imdbID
|
||||
|
@ -883,7 +888,7 @@ class IMDbSqlAccessSystem(IMDbBase):
|
|||
n_dict = {'name': company.name, 'country': company.countryCode}
|
||||
namline = build_company_name(n_dict)
|
||||
imdbID = self.company2imdbID(namline)
|
||||
if imdbID is not None:
|
||||
if imdbID is not None and not isinstance(imdbID, list):
|
||||
try: company.imdbID = int(imdbID)
|
||||
except: pass
|
||||
return imdbID
|
||||
|
@ -1116,6 +1121,7 @@ class IMDbSqlAccessSystem(IMDbBase):
|
|||
if mlinks:
|
||||
for ml in mlinks:
|
||||
lmovieData = get_movie_data(ml[0], self._kind)
|
||||
if lmovieData:
|
||||
m = Movie(movieID=ml[0], data=lmovieData, accessSystem='sql')
|
||||
ml[0] = m
|
||||
res['connections'] = {}
|
||||
|
|
|
@ -466,6 +466,7 @@ class _AlchemyConnection(object):
|
|||
|
||||
def setConnection(uri, tables, encoding='utf8', debug=False):
|
||||
"""Set connection for every table."""
|
||||
params = {'encoding': encoding}
|
||||
# FIXME: why on earth MySQL requires an additional parameter,
|
||||
# is well beyond my understanding...
|
||||
if uri.startswith('mysql'):
|
||||
|
@ -474,7 +475,11 @@ def setConnection(uri, tables, encoding='utf8', debug=False):
|
|||
else:
|
||||
uri += '?'
|
||||
uri += 'charset=%s' % encoding
|
||||
params = {'encoding': encoding}
|
||||
|
||||
# On some server configurations, we will need to explictly enable
|
||||
# loading data from local files
|
||||
params['local_infile'] = 1
|
||||
|
||||
if debug:
|
||||
params['echo'] = True
|
||||
if uri.startswith('ibm_db'):
|
||||
|
|
Binary file not shown.
|
@ -182,6 +182,10 @@ def setConnection(uri, tables, encoding='utf8', debug=False):
|
|||
kw['use_unicode'] = 1
|
||||
#kw['sqlobject_encoding'] = encoding
|
||||
kw['charset'] = encoding
|
||||
|
||||
# On some server configurations, we will need to explictly enable
|
||||
# loading data from local files
|
||||
kw['local_infile'] = 1
|
||||
conn = connectionForURI(uri, **kw)
|
||||
conn.debug = debug
|
||||
# XXX: doesn't work and a work-around was put in imdbpy2sql.py;
|
||||
|
|
|
@ -3,7 +3,7 @@ utils module (imdb package).
|
|||
|
||||
This module provides basic utilities for the imdb package.
|
||||
|
||||
Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
|
||||
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||
2009 H. Turgut Uyar <uyar@tekir.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
|
@ -189,10 +189,9 @@ _unicodeArticles = linguistics.toUnicode(_articles)
|
|||
articlesDicts = linguistics.articlesDictsForLang(None)
|
||||
spArticles = linguistics.spArticlesForLang(None)
|
||||
|
||||
def canonicalTitle(title, lang=None):
|
||||
def canonicalTitle(title, lang=None, imdbIndex=None):
|
||||
"""Return the title in the canonic format 'Movie Title, The';
|
||||
beware that it doesn't handle long imdb titles, but only the
|
||||
title portion, without year[/imdbIndex] or special markup.
|
||||
beware that it doesn't handle long imdb titles.
|
||||
The 'lang' argument can be used to specify the language of the title.
|
||||
"""
|
||||
isUnicode = isinstance(title, unicode)
|
||||
|
@ -203,15 +202,19 @@ def canonicalTitle(title, lang=None):
|
|||
except IndexError:
|
||||
pass
|
||||
if isUnicode:
|
||||
_format = u'%s, %s'
|
||||
_format = u'%s%s, %s'
|
||||
else:
|
||||
_format = '%s, %s'
|
||||
_format = '%s%s, %s'
|
||||
ltitle = title.lower()
|
||||
if imdbIndex:
|
||||
imdbIndex = ' (%s)' % imdbIndex
|
||||
else:
|
||||
imdbIndex = ''
|
||||
spArticles = linguistics.spArticlesForLang(lang)
|
||||
for article in spArticles[isUnicode]:
|
||||
if ltitle.startswith(article):
|
||||
lart = len(article)
|
||||
title = _format % (title[lart:], title[:lart])
|
||||
title = _format % (title[lart:], imdbIndex, title[:lart])
|
||||
if article[-1] == ' ':
|
||||
title = title[:-1]
|
||||
break
|
||||
|
@ -383,18 +386,42 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
|
|||
if title.endswith('(TV)'):
|
||||
kind = u'tv movie'
|
||||
title = title[:-4].rstrip()
|
||||
elif title.endswith('(TV Movie)'):
|
||||
kind = u'tv movie'
|
||||
title = title[:-10].rstrip()
|
||||
elif title.endswith('(V)'):
|
||||
kind = u'video movie'
|
||||
title = title[:-3].rstrip()
|
||||
elif title.endswith('(video)'):
|
||||
elif title.lower().endswith('(video)'):
|
||||
kind = u'video movie'
|
||||
title = title[:-7].rstrip()
|
||||
elif title.endswith('(TV Short)'):
|
||||
kind = u'tv short'
|
||||
title = title[:-10].rstrip()
|
||||
elif title.endswith('(TV Mini-Series)'):
|
||||
kind = u'tv mini series'
|
||||
title = title[:-16].rstrip()
|
||||
elif title.endswith('(mini)'):
|
||||
kind = u'tv mini series'
|
||||
title = title[:-6].rstrip()
|
||||
elif title.endswith('(VG)'):
|
||||
kind = u'video game'
|
||||
title = title[:-4].rstrip()
|
||||
elif title.endswith('(Video Game)'):
|
||||
kind = u'video game'
|
||||
title = title[:-12].rstrip()
|
||||
elif title.endswith('(TV Series)'):
|
||||
epindex = title.find('(TV Episode) - ')
|
||||
if epindex >= 0:
|
||||
# It's an episode of a series.
|
||||
kind = u'episode'
|
||||
series_info = analyze_title(title[epindex + 15:])
|
||||
result['episode of'] = series_info.get('title')
|
||||
result['series year'] = series_info.get('year')
|
||||
title = title[:epindex]
|
||||
else:
|
||||
kind = u'tv series'
|
||||
title = title[:-11].rstrip()
|
||||
# Search for the year and the optional imdbIndex (a roman number).
|
||||
yi = re_year_index.findall(title)
|
||||
if not yi:
|
||||
|
@ -430,9 +457,6 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
|
|||
if not kind:
|
||||
kind = u'tv series'
|
||||
title = title[1:-1].strip()
|
||||
elif title.endswith('(TV series)'):
|
||||
kind = u'tv series'
|
||||
title = title[:-11].rstrip()
|
||||
if not title:
|
||||
raise IMDbParserError('invalid title: "%s"' % original_t)
|
||||
if canonical is not None:
|
||||
|
@ -489,7 +513,7 @@ def _convertTime(title, fromPTDFtoWEB=1, _emptyString=u''):
|
|||
|
||||
def build_title(title_dict, canonical=None, canonicalSeries=None,
|
||||
canonicalEpisode=None, ptdf=0, lang=None, _doYear=1,
|
||||
_emptyString=u''):
|
||||
_emptyString=u'', appendKind=True):
|
||||
"""Given a dictionary that represents a "long" IMDb title,
|
||||
return a string.
|
||||
|
||||
|
@ -511,6 +535,11 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
|
|||
doYear = 0
|
||||
if ptdf:
|
||||
doYear = 1
|
||||
# XXX: for results coming from the new search page.
|
||||
if not isinstance(episode_of, (dict, _Container)):
|
||||
episode_of = {'title': episode_of, 'kind': 'tv series'}
|
||||
if 'series year' in title_dict:
|
||||
episode_of['year'] = title_dict['series year']
|
||||
pre_title = build_title(episode_of, canonical=canonicalSeries,
|
||||
ptdf=0, _doYear=doYear,
|
||||
_emptyString=_emptyString)
|
||||
|
@ -545,12 +574,14 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
|
|||
episode_title += '.%s' % episode
|
||||
episode_title += ')'
|
||||
episode_title = '{%s}' % episode_title
|
||||
return '%s %s' % (pre_title, episode_title)
|
||||
return _emptyString + '%s %s' % (_emptyString + pre_title,
|
||||
_emptyString + episode_title)
|
||||
title = title_dict.get('title', '')
|
||||
imdbIndex = title_dict.get('imdbIndex', '')
|
||||
if not title: return _emptyString
|
||||
if canonical is not None:
|
||||
if canonical:
|
||||
title = canonicalTitle(title, lang=lang)
|
||||
title = canonicalTitle(title, lang=lang, imdbIndex=imdbIndex)
|
||||
else:
|
||||
title = normalizeTitle(title, lang=lang)
|
||||
if pre_title:
|
||||
|
@ -558,15 +589,20 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
|
|||
if kind in (u'tv series', u'tv mini series'):
|
||||
title = '"%s"' % title
|
||||
if _doYear:
|
||||
imdbIndex = title_dict.get('imdbIndex')
|
||||
year = title_dict.get('year') or u'????'
|
||||
year = title_dict.get('year') or '????'
|
||||
if isinstance(_emptyString, str):
|
||||
year = str(year)
|
||||
imdbIndex = title_dict.get('imdbIndex')
|
||||
if not ptdf:
|
||||
if imdbIndex and (canonical is None or canonical):
|
||||
title += ' (%s)' % imdbIndex
|
||||
title += ' (%s)' % year
|
||||
else:
|
||||
title += ' (%s' % year
|
||||
if imdbIndex:
|
||||
if imdbIndex and (canonical is None or canonical):
|
||||
title += '/%s' % imdbIndex
|
||||
title += ')'
|
||||
if kind:
|
||||
if appendKind and kind:
|
||||
if kind == 'tv movie':
|
||||
title += ' (TV)'
|
||||
elif kind == 'video movie':
|
||||
|
|
|
@ -11,6 +11,7 @@ __author__ = "dbr/Ben"
|
|||
__version__ = "1.9"
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import getpass
|
||||
import StringIO
|
||||
|
@ -18,8 +19,10 @@ import tempfile
|
|||
import warnings
|
||||
import logging
|
||||
import zipfile
|
||||
import datetime as dt
|
||||
import requests
|
||||
import cachecontrol
|
||||
import xmltodict
|
||||
|
||||
try:
|
||||
import xml.etree.cElementTree as ElementTree
|
||||
|
@ -31,6 +34,7 @@ try:
|
|||
except ImportError:
|
||||
gzip = None
|
||||
|
||||
from lib.dateutil.parser import parse
|
||||
from cachecontrol import caches
|
||||
|
||||
from tvdb_ui import BaseUI, ConsoleUI
|
||||
|
@ -560,35 +564,60 @@ class Tvdb:
|
|||
except requests.Timeout, e:
|
||||
raise tvdb_error("Connection timed out " + str(e.message) + " while loading URL " + str(url))
|
||||
|
||||
if 'application/zip' in resp.headers.get("Content-Type", '') and resp.ok:
|
||||
def process(path, key, value):
|
||||
key = key.lower()
|
||||
|
||||
# clean up value and do type changes
|
||||
if value:
|
||||
try:
|
||||
# convert to integer if needed
|
||||
if value.isdigit():
|
||||
value = int(value)
|
||||
except:
|
||||
pass
|
||||
|
||||
if key in ['banner', 'fanart', 'poster']:
|
||||
value = self.config['url_artworkPrefix'] % (value)
|
||||
else:
|
||||
value = self._cleanData(value)
|
||||
|
||||
try:
|
||||
if key == 'firstaired' and value in "0000-00-00":
|
||||
new_value = str(dt.date.fromordinal(1))
|
||||
new_value = re.sub("([-]0{2}){1,}", "", new_value)
|
||||
fixDate = parse(new_value, fuzzy=True).date()
|
||||
value = fixDate.strftime("%Y-%m-%d")
|
||||
elif key == 'firstaired':
|
||||
value = parse(value, fuzzy=True).date()
|
||||
value = value.strftime("%Y-%m-%d")
|
||||
except:
|
||||
pass
|
||||
|
||||
value = self._cleanData(value)
|
||||
return (key, value)
|
||||
|
||||
if resp.ok:
|
||||
if 'application/zip' in resp.headers.get("Content-Type", ''):
|
||||
try:
|
||||
# TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20]
|
||||
log().debug("We recived a zip file unpacking now ...")
|
||||
zipdata = StringIO.StringIO()
|
||||
zipdata.write(resp.content)
|
||||
myzipfile = zipfile.ZipFile(zipdata)
|
||||
return myzipfile.read('%s.xml' % language)
|
||||
return xmltodict.parse(myzipfile.read('%s.xml' % language), postprocessor=process)
|
||||
except zipfile.BadZipfile:
|
||||
raise tvdb_error("Bad zip file received from thetvdb.com, could not read it")
|
||||
|
||||
return resp.content if resp.ok else None
|
||||
else:
|
||||
return xmltodict.parse(resp.text.strip(), postprocessor=process)
|
||||
|
||||
def _getetsrc(self, url, params=None, language=None):
|
||||
"""Loads a URL using caching, returns an ElementTree of the source
|
||||
"""
|
||||
src = self._loadUrl(url, params=params, language=language)
|
||||
try:
|
||||
# TVDB doesn't sanitize \r (CR) from user input in some fields,
|
||||
# remove it to avoid errors. Change from SickBeard, from will14m
|
||||
return ElementTree.fromstring(src.rstrip("\r")) if src else None
|
||||
except SyntaxError:
|
||||
src = self._loadUrl(url, params=params, language=language)
|
||||
try:
|
||||
return ElementTree.fromstring(src.rstrip("\r")) if src else None
|
||||
except SyntaxError, exceptionmsg:
|
||||
errormsg = "There was an error with the XML retrieved from thetvdb.com:\n%s" % (
|
||||
exceptionmsg
|
||||
)
|
||||
src = [src[item] for item in src][0]
|
||||
except:
|
||||
errormsg = "There was an error with the XML retrieved from thetvdb.com:"
|
||||
|
||||
if self.config['cache_enabled']:
|
||||
errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
|
||||
|
@ -599,6 +628,8 @@ class Tvdb:
|
|||
errormsg += "\nhttp://dbr.lighthouseapp.com/projects/13342-tvdb_api/overview\n"
|
||||
raise tvdb_error(errormsg)
|
||||
|
||||
return src
|
||||
|
||||
def _setItem(self, sid, seas, ep, attrib, value):
|
||||
"""Creates a new episode, creating Show(), Season() and
|
||||
Episode()s as required. Called by _getShowData to populate show
|
||||
|
@ -649,9 +680,8 @@ class Tvdb:
|
|||
log().debug("Searching for show %s" % series)
|
||||
self.config['params_getSeries']['seriesname'] = series
|
||||
seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
|
||||
allSeries = list(dict((s.tag.lower(), s.text) for s in x.getchildren()) for x in seriesEt)
|
||||
|
||||
return allSeries
|
||||
return [seriesEt[item] for item in seriesEt][0]
|
||||
|
||||
def _getSeries(self, series):
|
||||
"""This searches TheTVDB.com for the series name,
|
||||
|
@ -798,24 +828,13 @@ class Tvdb:
|
|||
self.config['url_seriesInfo'] % (sid, getShowInLanguage)
|
||||
)
|
||||
|
||||
if seriesInfoEt is None: return False
|
||||
for curInfo in seriesInfoEt.findall("Series")[0]:
|
||||
tag = curInfo.tag.lower()
|
||||
value = curInfo.text
|
||||
|
||||
if tag == 'seriesname' and value is None:
|
||||
# check and make sure we have data to process and that it contains a series name
|
||||
if seriesInfoEt is None or 'seriesname' not in seriesInfoEt['series']:
|
||||
return False
|
||||
|
||||
if value is not None:
|
||||
if tag == 'id':
|
||||
value = int(value)
|
||||
for k, v in seriesInfoEt['series'].items():
|
||||
self._setShowData(sid, k, v)
|
||||
|
||||
if tag in ['banner', 'fanart', 'poster']:
|
||||
value = self.config['url_artworkPrefix'] % (value)
|
||||
else:
|
||||
value = self._cleanData(value)
|
||||
|
||||
self._setShowData(sid, tag, value)
|
||||
if seriesSearch:
|
||||
return True
|
||||
|
||||
|
@ -837,63 +856,40 @@ class Tvdb:
|
|||
|
||||
epsEt = self._getetsrc(url, language=language)
|
||||
|
||||
for cur_ep in epsEt.findall("Episode"):
|
||||
|
||||
for cur_ep in epsEt["episode"]:
|
||||
if self.config['dvdorder']:
|
||||
log().debug('Using DVD ordering.')
|
||||
use_dvd = cur_ep.find('DVD_season').text != None and cur_ep.find('DVD_episodenumber').text != None
|
||||
use_dvd = cur_ep['dvd_season'] != None and cur_ep['dvd_episodenumber'] != None
|
||||
else:
|
||||
use_dvd = False
|
||||
|
||||
if use_dvd:
|
||||
elem_seasnum, elem_epno = cur_ep.find('DVD_season'), cur_ep.find('DVD_episodenumber')
|
||||
seasnum, epno = cur_ep['dvd_season'], cur_ep['dvd_episodenumber']
|
||||
else:
|
||||
elem_seasnum, elem_epno = cur_ep.find('SeasonNumber'), cur_ep.find('EpisodeNumber')
|
||||
|
||||
if elem_seasnum is None or elem_epno is None:
|
||||
seasnum, epno = cur_ep['seasonnumber'], cur_ep['episodenumber']
|
||||
|
||||
if seasnum is None or epno is None:
|
||||
log().warning("An episode has incomplete season/episode number (season: %r, episode: %r)" % (
|
||||
elem_seasnum, elem_epno))
|
||||
log().debug(
|
||||
" ".join(
|
||||
"%r is %r" % (child.tag, child.text) for child in cur_ep.getchildren()))
|
||||
# TODO: Should this happen?
|
||||
seasnum, epno))
|
||||
continue # Skip to next episode
|
||||
|
||||
|
||||
# float() is because https://github.com/dbr/tvnamer/issues/95 - should probably be fixed in TVDB data
|
||||
seas_no = int(float(elem_seasnum.text))
|
||||
ep_no = int(float(elem_epno.text))
|
||||
seas_no = int(float(seasnum))
|
||||
ep_no = int(float(epno))
|
||||
|
||||
useDVD = False
|
||||
for k,v in cur_ep.items():
|
||||
k = k.lower()
|
||||
|
||||
if (self.config['dvdorder']):
|
||||
log().debug('DVD Order? Yes')
|
||||
useDVD = (cur_ep.find('DVD_season').text != None and cur_ep.find('DVD_episodenumber').text != None)
|
||||
if v is not None:
|
||||
if k == 'id':
|
||||
v = int(v)
|
||||
|
||||
if k == 'filename':
|
||||
v = self.config['url_artworkPrefix'] % (v)
|
||||
else:
|
||||
log().debug('DVD Order? No')
|
||||
v = self._cleanData(v)
|
||||
|
||||
if (useDVD):
|
||||
log().debug('Use DVD Order? Yes')
|
||||
seas_no = int(cur_ep.find('DVD_season').text)
|
||||
ep_no = int(float(cur_ep.find('DVD_episodenumber').text))
|
||||
else:
|
||||
log().debug('Use DVD Order? No')
|
||||
seas_no = int(cur_ep.find('SeasonNumber').text)
|
||||
ep_no = int(cur_ep.find('EpisodeNumber').text)
|
||||
|
||||
for cur_item in cur_ep.getchildren():
|
||||
tag = cur_item.tag.lower()
|
||||
value = cur_item.text
|
||||
if value is not None:
|
||||
if tag == 'id':
|
||||
value = int(value)
|
||||
|
||||
if tag == 'filename':
|
||||
value = self.config['url_artworkPrefix'] % (value)
|
||||
else:
|
||||
value = self._cleanData(value)
|
||||
self._setItem(sid, seas_no, ep_no, tag, value)
|
||||
self._setItem(sid, seas_no, ep_no, k, v)
|
||||
|
||||
return True
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import logging
|
|||
import datetime as dt
|
||||
import requests
|
||||
import cachecontrol
|
||||
import xmltodict
|
||||
|
||||
try:
|
||||
import xml.etree.cElementTree as ElementTree
|
||||
|
@ -37,9 +38,11 @@ from tvrage_ui import BaseUI
|
|||
from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound,
|
||||
tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound)
|
||||
|
||||
|
||||
def log():
|
||||
return logging.getLogger("tvrage_api")
|
||||
|
||||
|
||||
def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
|
||||
"""Retry calling the decorated function using an exponential backoff.
|
||||
|
||||
|
@ -83,6 +86,7 @@ def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
|
|||
|
||||
return deco_retry
|
||||
|
||||
|
||||
class ShowContainer(dict):
|
||||
"""Simple dict that holds a series of Show instances
|
||||
"""
|
||||
|
@ -112,6 +116,7 @@ class ShowContainer(dict):
|
|||
class Show(dict):
|
||||
"""Holds a dict of seasons, and show data.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
dict.__init__(self)
|
||||
self.data = {}
|
||||
|
@ -261,8 +266,10 @@ class Episode(dict):
|
|||
if cur_value.find(unicode(term).lower()) > -1:
|
||||
return self
|
||||
|
||||
|
||||
class TVRage:
|
||||
"""Create easy-to-use interface to name of season/episode name"""
|
||||
|
||||
def __init__(self,
|
||||
interactive=False,
|
||||
select_first=False,
|
||||
|
@ -390,9 +397,9 @@ class TVRage:
|
|||
|
||||
# get response from TVRage
|
||||
if self.config['cache_enabled']:
|
||||
resp = self.sess.get(url, cache_auto=True, params=params)
|
||||
resp = self.sess.get(url.strip(), cache_auto=True, params=params)
|
||||
else:
|
||||
resp = requests.get(url, params=params)
|
||||
resp = requests.get(url.strip(), params=params)
|
||||
|
||||
except requests.HTTPError, e:
|
||||
raise tvrage_error("HTTP error " + str(e.errno) + " while loading URL " + str(url))
|
||||
|
@ -403,12 +410,8 @@ class TVRage:
|
|||
except requests.Timeout, e:
|
||||
raise tvrage_error("Connection timed out " + str(e.message) + " while loading URL " + str(url))
|
||||
|
||||
return resp.content if resp.ok else None
|
||||
|
||||
def _getetsrc(self, url, params=None):
|
||||
"""Loads a URL using caching, returns an ElementTree of the source
|
||||
"""
|
||||
reDict = {
|
||||
def remap_keys(path, key, value):
|
||||
name_map = {
|
||||
'showid': 'id',
|
||||
'showname': 'seriesname',
|
||||
'name': 'seriesname',
|
||||
|
@ -422,54 +425,59 @@ class TVRage:
|
|||
'title': 'episodename',
|
||||
'airdate': 'firstaired',
|
||||
'screencap': 'filename',
|
||||
'seasonnum': 'episodenumber',
|
||||
'seasonnum': 'episodenumber'
|
||||
}
|
||||
|
||||
robj = re.compile('|'.join(reDict.keys()))
|
||||
src = self._loadUrl(url, params)
|
||||
try:
|
||||
# TVRAGE doesn't sanitize \r (CR) from user input in some fields,
|
||||
# remove it to avoid errors. Change from SickBeard, from will14m
|
||||
xml = ElementTree.fromstring(src.rstrip("\r"))
|
||||
tree = ElementTree.ElementTree(xml)
|
||||
for elm in tree.findall('.//*'):
|
||||
elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag)
|
||||
key = name_map[key.lower()]
|
||||
except (ValueError, TypeError, KeyError):
|
||||
key.lower()
|
||||
|
||||
# clean up value and do type changes
|
||||
if value:
|
||||
if isinstance(value, dict):
|
||||
if key == 'network':
|
||||
value = value['#text']
|
||||
if key == 'genre':
|
||||
value = value['genre']
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
value = '|' + '|'.join(value) + '|'
|
||||
|
||||
if elm.tag in 'firstaired':
|
||||
try:
|
||||
if elm.text in "0000-00-00":
|
||||
elm.text = str(dt.date.fromordinal(1))
|
||||
elm.text = re.sub("([-]0{2}){1,}", "", elm.text)
|
||||
fixDate = parse(elm.text, fuzzy=True).date()
|
||||
elm.text = fixDate.strftime("%Y-%m-%d")
|
||||
# convert to integer if needed
|
||||
if value.isdigit():
|
||||
value = int(value)
|
||||
except:
|
||||
pass
|
||||
return ElementTree.fromstring(ElementTree.tostring(xml))
|
||||
except SyntaxError:
|
||||
src = self._loadUrl(url, params)
|
||||
try:
|
||||
xml = ElementTree.fromstring(src.rstrip("\r"))
|
||||
tree = ElementTree.ElementTree(xml)
|
||||
for elm in tree.findall('.//*'):
|
||||
elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag)
|
||||
|
||||
if elm.tag in 'firstaired' and elm.text:
|
||||
if elm.text == "0000-00-00":
|
||||
elm.text = str(dt.date.fromordinal(1))
|
||||
try:
|
||||
#month = strptime(match.group('air_month')[:3],'%b').tm_mon
|
||||
#day = re.sub("(st|nd|rd|th)", "", match.group('air_day'))
|
||||
#dtStr = '%s/%s/%s' % (year, month, day)
|
||||
|
||||
fixDate = parse(elm.text, fuzzy=True)
|
||||
elm.text = fixDate.strftime("%Y-%m-%d")
|
||||
if key == 'firstaired' and value in "0000-00-00":
|
||||
new_value = str(dt.date.fromordinal(1))
|
||||
new_value = re.sub("([-]0{2}){1,}", "", new_value)
|
||||
fixDate = parse(new_value, fuzzy=True).date()
|
||||
value = fixDate.strftime("%Y-%m-%d")
|
||||
elif key == 'firstaired':
|
||||
value = parse(value, fuzzy=True).date()
|
||||
value = value.strftime("%Y-%m-%d")
|
||||
except:
|
||||
pass
|
||||
return ElementTree.fromstring(ElementTree.tostring(xml))
|
||||
except SyntaxError, exceptionmsg:
|
||||
errormsg = "There was an error with the XML retrieved from tvrage.com:\n%s" % (
|
||||
exceptionmsg
|
||||
)
|
||||
|
||||
value = self._cleanData(value)
|
||||
return (key, value)
|
||||
|
||||
if resp.ok:
|
||||
return xmltodict.parse(resp.text.strip(), postprocessor=remap_keys)
|
||||
|
||||
def _getetsrc(self, url, params=None):
|
||||
"""Loads a URL using caching, returns an ElementTree of the source
|
||||
"""
|
||||
|
||||
try:
|
||||
src = self._loadUrl(url, params)
|
||||
src = [src[item] for item in src][0]
|
||||
except:
|
||||
errormsg = "There was an error with the XML retrieved from tvrage.com"
|
||||
|
||||
if self.config['cache_enabled']:
|
||||
errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
|
||||
|
@ -479,6 +487,8 @@ class TVRage:
|
|||
errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on\n"
|
||||
raise tvrage_error(errormsg)
|
||||
|
||||
return src
|
||||
|
||||
def _setItem(self, sid, seas, ep, attrib, value):
|
||||
"""Creates a new episode, creating Show(), Season() and
|
||||
Episode()s as required. Called by _getShowData to populate show
|
||||
|
@ -529,9 +539,8 @@ class TVRage:
|
|||
log().debug("Searching for show %s" % series)
|
||||
self.config['params_getSeries']['show'] = series
|
||||
seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
|
||||
allSeries = list(dict((s.tag.lower(),s.text) for s in x.getchildren()) for x in seriesEt)
|
||||
|
||||
return allSeries
|
||||
return [seriesEt[item] for item in seriesEt][0]
|
||||
|
||||
def _getSeries(self, series):
|
||||
"""This searches tvrage.com for the series name,
|
||||
|
@ -568,60 +577,47 @@ class TVRage:
|
|||
self.config['params_seriesInfo']
|
||||
)
|
||||
|
||||
if seriesInfoEt is None: return False
|
||||
for curInfo in seriesInfoEt:
|
||||
tag = curInfo.tag.lower()
|
||||
value = curInfo.text
|
||||
|
||||
if tag == 'seriesname' and value is None:
|
||||
# check and make sure we have data to process and that it contains a series name
|
||||
if seriesInfoEt is None or 'seriesname' not in seriesInfoEt:
|
||||
return False
|
||||
|
||||
if tag == 'id':
|
||||
value = int(value)
|
||||
for k, v in seriesInfoEt.items():
|
||||
self._setShowData(sid, k, v)
|
||||
|
||||
if value is not None:
|
||||
value = self._cleanData(value)
|
||||
|
||||
self._setShowData(sid, tag, value)
|
||||
if seriesSearch: return True
|
||||
|
||||
try:
|
||||
# Parse genre data
|
||||
log().debug('Getting genres of %s' % (sid))
|
||||
for genre in seriesInfoEt.find('genres'):
|
||||
tag = genre.tag.lower()
|
||||
|
||||
value = genre.text
|
||||
if value is not None:
|
||||
value = self._cleanData(value)
|
||||
|
||||
self._setShowData(sid, tag, value)
|
||||
except Exception:
|
||||
log().debug('No genres for %s' % (sid))
|
||||
# series search ends here
|
||||
if seriesSearch:
|
||||
return True
|
||||
|
||||
# Parse episode data
|
||||
log().debug('Getting all episodes of %s' % (sid))
|
||||
|
||||
self.config['params_epInfo']['sid'] = sid
|
||||
epsEt = self._getetsrc(self.config['url_epInfo'], self.config['params_epInfo'])
|
||||
for cur_list in epsEt.findall("Episodelist"):
|
||||
for cur_seas in cur_list:
|
||||
try:
|
||||
seas_no = int(cur_seas.attrib['no'])
|
||||
for cur_ep in cur_seas:
|
||||
ep_no = int(cur_ep.find('episodenumber').text)
|
||||
|
||||
for season in epsEt['Episodelist']['Season']:
|
||||
episodes = season['episode']
|
||||
if not isinstance(episodes, list):
|
||||
episodes = [episodes]
|
||||
|
||||
for episode in episodes:
|
||||
seas_no = int(season['@no'])
|
||||
ep_no = int(episode['episodenumber'])
|
||||
self._setItem(sid, seas_no, ep_no, 'seasonnumber', seas_no)
|
||||
for cur_item in cur_ep:
|
||||
tag = cur_item.tag.lower()
|
||||
|
||||
value = cur_item.text
|
||||
if value is not None:
|
||||
if tag == 'id':
|
||||
value = int(value)
|
||||
for k,v in episode.items():
|
||||
try:
|
||||
k = k.lower()
|
||||
if v is not None:
|
||||
if k == 'link':
|
||||
v = v.rsplit('/', 1)[1]
|
||||
k = 'id'
|
||||
|
||||
value = self._cleanData(value)
|
||||
if k == 'id':
|
||||
v = int(v)
|
||||
|
||||
self._setItem(sid, seas_no, ep_no, tag, value)
|
||||
v = self._cleanData(v)
|
||||
|
||||
self._setItem(sid, seas_no, ep_no, k, v)
|
||||
except:
|
||||
continue
|
||||
return True
|
||||
|
@ -673,11 +669,13 @@ def main():
|
|||
grabs an episode name interactively.
|
||||
"""
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
tvrage_instance = TVRage(cache=False)
|
||||
print tvrage_instance['Lost']['seriesname']
|
||||
print tvrage_instance['Lost'][1][4]['episodename']
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
@ -0,0 +1,359 @@
|
|||
#!/usr/bin/env python
|
||||
"Makes working with XML feel like you are working with JSON"
|
||||
|
||||
from xml.parsers import expat
|
||||
from xml.sax.saxutils import XMLGenerator
|
||||
from xml.sax.xmlreader import AttributesImpl
|
||||
try: # pragma no cover
|
||||
from cStringIO import StringIO
|
||||
except ImportError: # pragma no cover
|
||||
try:
|
||||
from StringIO import StringIO
|
||||
except ImportError:
|
||||
from io import StringIO
|
||||
try: # pragma no cover
|
||||
from collections import OrderedDict
|
||||
except ImportError: # pragma no cover
|
||||
try:
|
||||
from ordereddict import OrderedDict
|
||||
except ImportError:
|
||||
OrderedDict = dict
|
||||
|
||||
try: # pragma no cover
|
||||
_basestring = basestring
|
||||
except NameError: # pragma no cover
|
||||
_basestring = str
|
||||
try: # pragma no cover
|
||||
_unicode = unicode
|
||||
except NameError: # pragma no cover
|
||||
_unicode = str
|
||||
|
||||
__author__ = 'Martin Blech'
|
||||
__version__ = '0.9.0'
|
||||
__license__ = 'MIT'
|
||||
|
||||
|
||||
class ParsingInterrupted(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class _DictSAXHandler(object):
|
||||
def __init__(self,
|
||||
item_depth=0,
|
||||
item_callback=lambda *args: True,
|
||||
xml_attribs=True,
|
||||
attr_prefix='@',
|
||||
cdata_key='#text',
|
||||
force_cdata=False,
|
||||
cdata_separator='',
|
||||
postprocessor=None,
|
||||
dict_constructor=OrderedDict,
|
||||
strip_whitespace=True,
|
||||
namespace_separator=':',
|
||||
namespaces=None):
|
||||
self.path = []
|
||||
self.stack = []
|
||||
self.data = None
|
||||
self.item = None
|
||||
self.item_depth = item_depth
|
||||
self.xml_attribs = xml_attribs
|
||||
self.item_callback = item_callback
|
||||
self.attr_prefix = attr_prefix
|
||||
self.cdata_key = cdata_key
|
||||
self.force_cdata = force_cdata
|
||||
self.cdata_separator = cdata_separator
|
||||
self.postprocessor = postprocessor
|
||||
self.dict_constructor = dict_constructor
|
||||
self.strip_whitespace = strip_whitespace
|
||||
self.namespace_separator = namespace_separator
|
||||
self.namespaces = namespaces
|
||||
|
||||
def _build_name(self, full_name):
|
||||
if not self.namespaces:
|
||||
return full_name
|
||||
i = full_name.rfind(self.namespace_separator)
|
||||
if i == -1:
|
||||
return full_name
|
||||
namespace, name = full_name[:i], full_name[i+1:]
|
||||
short_namespace = self.namespaces.get(namespace, namespace)
|
||||
if not short_namespace:
|
||||
return name
|
||||
else:
|
||||
return self.namespace_separator.join((short_namespace, name))
|
||||
|
||||
def _attrs_to_dict(self, attrs):
|
||||
if isinstance(attrs, dict):
|
||||
return attrs
|
||||
return self.dict_constructor(zip(attrs[0::2], attrs[1::2]))
|
||||
|
||||
def startElement(self, full_name, attrs):
|
||||
name = self._build_name(full_name)
|
||||
attrs = self._attrs_to_dict(attrs)
|
||||
self.path.append((name, attrs or None))
|
||||
if len(self.path) > self.item_depth:
|
||||
self.stack.append((self.item, self.data))
|
||||
if self.xml_attribs:
|
||||
attrs = self.dict_constructor(
|
||||
(self.attr_prefix+key, value)
|
||||
for (key, value) in attrs.items())
|
||||
else:
|
||||
attrs = None
|
||||
self.item = attrs or None
|
||||
self.data = None
|
||||
|
||||
def endElement(self, full_name):
|
||||
name = self._build_name(full_name)
|
||||
if len(self.path) == self.item_depth:
|
||||
item = self.item
|
||||
if item is None:
|
||||
item = self.data
|
||||
should_continue = self.item_callback(self.path, item)
|
||||
if not should_continue:
|
||||
raise ParsingInterrupted()
|
||||
if len(self.stack):
|
||||
item, data = self.item, self.data
|
||||
self.item, self.data = self.stack.pop()
|
||||
if self.strip_whitespace and data is not None:
|
||||
data = data.strip() or None
|
||||
if data and self.force_cdata and item is None:
|
||||
item = self.dict_constructor()
|
||||
if item is not None:
|
||||
if data:
|
||||
self.push_data(item, self.cdata_key, data)
|
||||
self.item = self.push_data(self.item, name, item)
|
||||
else:
|
||||
self.item = self.push_data(self.item, name, data)
|
||||
else:
|
||||
self.item = self.data = None
|
||||
self.path.pop()
|
||||
|
||||
def characters(self, data):
|
||||
if not self.data:
|
||||
self.data = data
|
||||
else:
|
||||
self.data += self.cdata_separator + data
|
||||
|
||||
def push_data(self, item, key, data):
|
||||
if self.postprocessor is not None:
|
||||
result = self.postprocessor(self.path, key, data)
|
||||
if result is None:
|
||||
return item
|
||||
key, data = result
|
||||
if item is None:
|
||||
item = self.dict_constructor()
|
||||
try:
|
||||
value = item[key]
|
||||
if isinstance(value, list):
|
||||
value.append(data)
|
||||
else:
|
||||
item[key] = [value, data]
|
||||
except KeyError:
|
||||
item[key] = data
|
||||
return item
|
||||
|
||||
|
||||
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
|
||||
namespace_separator=':', **kwargs):
|
||||
"""Parse the given XML input and convert it into a dictionary.
|
||||
|
||||
`xml_input` can either be a `string` or a file-like object.
|
||||
|
||||
If `xml_attribs` is `True`, element attributes are put in the dictionary
|
||||
among regular child elements, using `@` as a prefix to avoid collisions. If
|
||||
set to `False`, they are just ignored.
|
||||
|
||||
Simple example::
|
||||
|
||||
>>> import xmltodict
|
||||
>>> doc = xmltodict.parse(\"\"\"
|
||||
... <a prop="x">
|
||||
... <b>1</b>
|
||||
... <b>2</b>
|
||||
... </a>
|
||||
... \"\"\")
|
||||
>>> doc['a']['@prop']
|
||||
u'x'
|
||||
>>> doc['a']['b']
|
||||
[u'1', u'2']
|
||||
|
||||
If `item_depth` is `0`, the function returns a dictionary for the root
|
||||
element (default behavior). Otherwise, it calls `item_callback` every time
|
||||
an item at the specified depth is found and returns `None` in the end
|
||||
(streaming mode).
|
||||
|
||||
The callback function receives two parameters: the `path` from the document
|
||||
root to the item (name-attribs pairs), and the `item` (dict). If the
|
||||
callback's return value is false-ish, parsing will be stopped with the
|
||||
:class:`ParsingInterrupted` exception.
|
||||
|
||||
Streaming example::
|
||||
|
||||
>>> def handle(path, item):
|
||||
... print 'path:%s item:%s' % (path, item)
|
||||
... return True
|
||||
...
|
||||
>>> xmltodict.parse(\"\"\"
|
||||
... <a prop="x">
|
||||
... <b>1</b>
|
||||
... <b>2</b>
|
||||
... </a>\"\"\", item_depth=2, item_callback=handle)
|
||||
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
|
||||
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2
|
||||
|
||||
The optional argument `postprocessor` is a function that takes `path`,
|
||||
`key` and `value` as positional arguments and returns a new `(key, value)`
|
||||
pair where both `key` and `value` may have changed. Usage example::
|
||||
|
||||
>>> def postprocessor(path, key, value):
|
||||
... try:
|
||||
... return key + ':int', int(value)
|
||||
... except (ValueError, TypeError):
|
||||
... return key, value
|
||||
>>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
|
||||
... postprocessor=postprocessor)
|
||||
OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))])
|
||||
|
||||
You can pass an alternate version of `expat` (such as `defusedexpat`) by
|
||||
using the `expat` parameter. E.g:
|
||||
|
||||
>>> import defusedexpat
|
||||
>>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
|
||||
OrderedDict([(u'a', u'hello')])
|
||||
|
||||
"""
|
||||
handler = _DictSAXHandler(namespace_separator=namespace_separator,
|
||||
**kwargs)
|
||||
if isinstance(xml_input, _unicode):
|
||||
if not encoding:
|
||||
encoding = 'utf-8'
|
||||
xml_input = xml_input.encode(encoding)
|
||||
if not process_namespaces:
|
||||
namespace_separator = None
|
||||
parser = expat.ParserCreate(
|
||||
encoding,
|
||||
namespace_separator
|
||||
)
|
||||
try:
|
||||
parser.ordered_attributes = True
|
||||
except AttributeError:
|
||||
# Jython's expat does not support ordered_attributes
|
||||
pass
|
||||
parser.StartElementHandler = handler.startElement
|
||||
parser.EndElementHandler = handler.endElement
|
||||
parser.CharacterDataHandler = handler.characters
|
||||
parser.buffer_text = True
|
||||
try:
|
||||
parser.ParseFile(xml_input)
|
||||
except (TypeError, AttributeError):
|
||||
parser.Parse(xml_input, True)
|
||||
return handler.item
|
||||
|
||||
|
||||
def _emit(key, value, content_handler,
|
||||
attr_prefix='@',
|
||||
cdata_key='#text',
|
||||
depth=0,
|
||||
preprocessor=None,
|
||||
pretty=False,
|
||||
newl='\n',
|
||||
indent='\t'):
|
||||
if preprocessor is not None:
|
||||
result = preprocessor(key, value)
|
||||
if result is None:
|
||||
return
|
||||
key, value = result
|
||||
if not isinstance(value, (list, tuple)):
|
||||
value = [value]
|
||||
if depth == 0 and len(value) > 1:
|
||||
raise ValueError('document with multiple roots')
|
||||
for v in value:
|
||||
if v is None:
|
||||
v = OrderedDict()
|
||||
elif not isinstance(v, dict):
|
||||
v = _unicode(v)
|
||||
if isinstance(v, _basestring):
|
||||
v = OrderedDict(((cdata_key, v),))
|
||||
cdata = None
|
||||
attrs = OrderedDict()
|
||||
children = []
|
||||
for ik, iv in v.items():
|
||||
if ik == cdata_key:
|
||||
cdata = iv
|
||||
continue
|
||||
if ik.startswith(attr_prefix):
|
||||
attrs[ik[len(attr_prefix):]] = iv
|
||||
continue
|
||||
children.append((ik, iv))
|
||||
if pretty:
|
||||
content_handler.ignorableWhitespace(depth * indent)
|
||||
content_handler.startElement(key, AttributesImpl(attrs))
|
||||
if pretty and children:
|
||||
content_handler.ignorableWhitespace(newl)
|
||||
for child_key, child_value in children:
|
||||
_emit(child_key, child_value, content_handler,
|
||||
attr_prefix, cdata_key, depth+1, preprocessor,
|
||||
pretty, newl, indent)
|
||||
if cdata is not None:
|
||||
content_handler.characters(cdata)
|
||||
if pretty and children:
|
||||
content_handler.ignorableWhitespace(depth * indent)
|
||||
content_handler.endElement(key)
|
||||
if pretty and depth:
|
||||
content_handler.ignorableWhitespace(newl)
|
||||
|
||||
|
||||
def unparse(input_dict, output=None, encoding='utf-8', full_document=True,
|
||||
**kwargs):
|
||||
"""Emit an XML document for the given `input_dict` (reverse of `parse`).
|
||||
|
||||
The resulting XML document is returned as a string, but if `output` (a
|
||||
file-like object) is specified, it is written there instead.
|
||||
|
||||
Dictionary keys prefixed with `attr_prefix` (default=`'@'`) are interpreted
|
||||
as XML node attributes, whereas keys equal to `cdata_key`
|
||||
(default=`'#text'`) are treated as character data.
|
||||
|
||||
The `pretty` parameter (default=`False`) enables pretty-printing. In this
|
||||
mode, lines are terminated with `'\n'` and indented with `'\t'`, but this
|
||||
can be customized with the `newl` and `indent` parameters.
|
||||
|
||||
"""
|
||||
((key, value),) = input_dict.items()
|
||||
must_return = False
|
||||
if output is None:
|
||||
output = StringIO()
|
||||
must_return = True
|
||||
content_handler = XMLGenerator(output, encoding)
|
||||
if full_document:
|
||||
content_handler.startDocument()
|
||||
_emit(key, value, content_handler, **kwargs)
|
||||
if full_document:
|
||||
content_handler.endDocument()
|
||||
if must_return:
|
||||
value = output.getvalue()
|
||||
try: # pragma no cover
|
||||
value = value.decode(encoding)
|
||||
except AttributeError: # pragma no cover
|
||||
pass
|
||||
return value
|
||||
|
||||
if __name__ == '__main__': # pragma: no cover
|
||||
import sys
|
||||
import marshal
|
||||
|
||||
(item_depth,) = sys.argv[1:]
|
||||
item_depth = int(item_depth)
|
||||
|
||||
def handle_item(path, item):
|
||||
marshal.dump((path, item), sys.stdout)
|
||||
return True
|
||||
|
||||
try:
|
||||
root = parse(sys.stdin,
|
||||
item_depth=item_depth,
|
||||
item_callback=handle_item,
|
||||
dict_constructor=dict)
|
||||
if item_depth == 0:
|
||||
handle_item([], root)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
|
@ -782,15 +782,11 @@ class GenericMetadata():
|
|||
|
||||
# Try and get posters and fanart from TMDB
|
||||
if image_url is None:
|
||||
for show_name in set(allPossibleShowNames(show_obj)):
|
||||
if image_type in ('poster', 'poster_thumb'):
|
||||
image_url = self._retrieve_show_images_from_tmdb(show_obj, poster=True)
|
||||
elif image_type == 'fanart':
|
||||
image_url = self._retrieve_show_images_from_tmdb(show_obj, backdrop=True)
|
||||
|
||||
if image_url:
|
||||
break
|
||||
|
||||
if image_url:
|
||||
image_data = metadata_helpers.getShowImage(image_url, which)
|
||||
return image_data
|
||||
|
@ -965,8 +961,6 @@ class GenericMetadata():
|
|||
return (indexer_id, name, indexer)
|
||||
|
||||
def _retrieve_show_images_from_tmdb(self, show, backdrop=False, poster=False):
|
||||
tmdb_id = None
|
||||
|
||||
# get TMDB configuration info
|
||||
tmdb = TMDB(sickbeard.TMDB_API_KEY)
|
||||
config = tmdb.Configuration()
|
||||
|
@ -981,27 +975,14 @@ class GenericMetadata():
|
|||
|
||||
try:
|
||||
search = tmdb.Search()
|
||||
for result in search.collection({'query': show.name}) + search.tv({'query': show.name}):
|
||||
tmdb_id = result['id']
|
||||
external_ids = tmdb.TV(tmdb_id).external_ids()
|
||||
if show.indexerid in [external_ids['tvdb_id'], external_ids['tvrage_id']]:
|
||||
break
|
||||
for show_name in set(allPossibleShowNames(show)):
|
||||
for result in search.collection({'query': show_name})['results'] + search.tv({'query': show_name})['results']:
|
||||
if backdrop and result['backdrop_path']:
|
||||
return "{0}{1}{2}".format(base_url, max_size, result['backdrop_path'])
|
||||
elif poster and result['poster_path']:
|
||||
return "{0}{1}{2}".format(base_url, max_size, result['poster_path'])
|
||||
|
||||
if tmdb_id:
|
||||
images = tmdb.Collections(tmdb_id).images()
|
||||
if len(images) > 0:
|
||||
# get backdrop urls
|
||||
if backdrop:
|
||||
rel_path = images['backdrops'][0]['file_path']
|
||||
url = "{0}{1}{2}".format(base_url, max_size, rel_path)
|
||||
return url
|
||||
|
||||
# get poster urls
|
||||
if poster:
|
||||
rel_path = images['posters'][0]['file_path']
|
||||
url = "{0}{1}{2}".format(base_url, max_size, rel_path)
|
||||
return url
|
||||
except:
|
||||
except Exception, e:
|
||||
pass
|
||||
|
||||
logger.log(u"Could not find any posters or background for " + show.name, logger.DEBUG)
|
|
@ -829,7 +829,7 @@ class TVShow(object):
|
|||
self.airs = myEp["airs_dayofweek"] + " " + myEp["airs_time"]
|
||||
|
||||
if getattr(myEp, 'firstaired', None) is not None:
|
||||
self.startyear = int(myEp["firstaired"].split('-')[0])
|
||||
self.startyear = int(str(myEp["firstaired"]).split('-')[0])
|
||||
|
||||
self.status = getattr(myEp, 'status', '')
|
||||
|
||||
|
@ -855,7 +855,6 @@ class TVShow(object):
|
|||
i = imdb.IMDb()
|
||||
imdbTv = i.get_movie(str(re.sub("[^0-9]", "", self.imdbid)))
|
||||
|
||||
test = imdbTv.keys()
|
||||
for key in filter(lambda x: x.replace('_', ' ') in imdbTv.keys(), imdb_info.keys()):
|
||||
# Store only the first value for string type
|
||||
if type(imdb_info[key]) == type('') and type(imdbTv.get(key)) == type([]):
|
||||
|
@ -1556,7 +1555,7 @@ class TVEpisode(object):
|
|||
self.deleteEpisode()
|
||||
return False
|
||||
|
||||
if myEp["absolute_number"] == None or myEp["absolute_number"] == "":
|
||||
if getattr(myEp, 'absolute_number', None) is None:
|
||||
logger.log(u"This episode (" + self.show.name + " - " + str(season) + "x" + str(
|
||||
episode) + ") has no absolute number on " + sickbeard.indexerApi(
|
||||
self.indexer).name
|
||||
|
@ -1564,7 +1563,7 @@ class TVEpisode(object):
|
|||
else:
|
||||
logger.log(
|
||||
str(self.show.indexerid) + ": The absolute_number for " + str(season) + "x" + str(episode) + " is : " +
|
||||
myEp["absolute_number"], logger.DEBUG)
|
||||
str(myEp["absolute_number"]), logger.DEBUG)
|
||||
self.absolute_number = int(myEp["absolute_number"])
|
||||
|
||||
self.name = getattr(myEp, 'episodename', "")
|
||||
|
@ -1603,6 +1602,7 @@ class TVEpisode(object):
|
|||
u"The show dir is missing, not bothering to change the episode statuses since it'd probably be invalid")
|
||||
return
|
||||
|
||||
if self.location:
|
||||
logger.log(str(self.show.indexerid) + u": Setting status for " + str(season) + "x" + str(
|
||||
episode) + " based on status " + str(self.status) + " and existence of " + self.location, logger.DEBUG)
|
||||
|
||||
|
|
Loading…
Reference in New Issue