Update imdbpy libs to v5.0
Fixed invalid indexer id issues for TVRage shows. Fixed issues for getting posters and backdrops for TVRage shows. We now convert XML straight to a dict object for Indexer APIs, improved overall performance api's Fixed issues with TVRage shows and displaying genre's properly.
This commit is contained in:
parent
764cf6e62e
commit
2dcd26e69c
|
@ -6,7 +6,7 @@ a person from the IMDb database.
|
||||||
It can fetch data through different media (e.g.: the IMDb web pages,
|
It can fetch data through different media (e.g.: the IMDb web pages,
|
||||||
a SQL database, etc.)
|
a SQL database, etc.)
|
||||||
|
|
||||||
Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
|
Copyright 2004-2014 Davide Alberani <da@erlug.linux.it>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -25,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
|
||||||
__all__ = ['IMDb', 'IMDbError', 'Movie', 'Person', 'Character', 'Company',
|
__all__ = ['IMDb', 'IMDbError', 'Movie', 'Person', 'Character', 'Company',
|
||||||
'available_access_systems']
|
'available_access_systems']
|
||||||
__version__ = VERSION = '4.9'
|
__version__ = VERSION = '5.0'
|
||||||
|
|
||||||
# Import compatibility module (importing it is enough).
|
# Import compatibility module (importing it is enough).
|
||||||
import _compat
|
import _compat
|
||||||
|
@ -160,6 +160,7 @@ def IMDb(accessSystem=None, *arguments, **keywords):
|
||||||
kwds.update(keywords)
|
kwds.update(keywords)
|
||||||
keywords = kwds
|
keywords = kwds
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
|
import logging
|
||||||
logging.getLogger('imdbpy').warn('Unable to read configuration' \
|
logging.getLogger('imdbpy').warn('Unable to read configuration' \
|
||||||
' file; complete error: %s' % e)
|
' file; complete error: %s' % e)
|
||||||
# It just LOOKS LIKE a bad habit: we tried to read config
|
# It just LOOKS LIKE a bad habit: we tried to read config
|
||||||
|
@ -303,7 +304,7 @@ class IMDbBase:
|
||||||
# http://akas.imdb.com/keyword/%s/
|
# http://akas.imdb.com/keyword/%s/
|
||||||
imdbURL_keyword_main=imdbURL_base + 'keyword/%s/'
|
imdbURL_keyword_main=imdbURL_base + 'keyword/%s/'
|
||||||
# http://akas.imdb.com/chart/top
|
# http://akas.imdb.com/chart/top
|
||||||
imdbURL_top250=imdbURL_base + 'chart/top',
|
imdbURL_top250=imdbURL_base + 'chart/top'
|
||||||
# http://akas.imdb.com/chart/bottom
|
# http://akas.imdb.com/chart/bottom
|
||||||
imdbURL_bottom100=imdbURL_base + 'chart/bottom'
|
imdbURL_bottom100=imdbURL_base + 'chart/bottom'
|
||||||
# http://akas.imdb.com/find?%s
|
# http://akas.imdb.com/find?%s
|
||||||
|
@ -824,22 +825,23 @@ class IMDbBase:
|
||||||
# subclass, somewhere under the imdb.parser package.
|
# subclass, somewhere under the imdb.parser package.
|
||||||
raise NotImplementedError('override this method')
|
raise NotImplementedError('override this method')
|
||||||
|
|
||||||
def _searchIMDb(self, kind, ton):
|
def _searchIMDb(self, kind, ton, title_kind=None):
|
||||||
"""Search the IMDb akas server for the given title or name."""
|
"""Search the IMDb akas server for the given title or name."""
|
||||||
# The Exact Primary search system has gone AWOL, so we resort
|
# The Exact Primary search system has gone AWOL, so we resort
|
||||||
# to the mobile search. :-/
|
# to the mobile search. :-/
|
||||||
if not ton:
|
if not ton:
|
||||||
return None
|
return None
|
||||||
|
ton = ton.strip('"')
|
||||||
aSystem = IMDb('mobile')
|
aSystem = IMDb('mobile')
|
||||||
if kind == 'tt':
|
if kind == 'tt':
|
||||||
searchFunct = aSystem.search_movie
|
searchFunct = aSystem.search_movie
|
||||||
check = 'long imdb canonical title'
|
check = 'long imdb title'
|
||||||
elif kind == 'nm':
|
elif kind == 'nm':
|
||||||
searchFunct = aSystem.search_person
|
searchFunct = aSystem.search_person
|
||||||
check = 'long imdb canonical name'
|
check = 'long imdb name'
|
||||||
elif kind == 'char':
|
elif kind == 'char':
|
||||||
searchFunct = aSystem.search_character
|
searchFunct = aSystem.search_character
|
||||||
check = 'long imdb canonical name'
|
check = 'long imdb name'
|
||||||
elif kind == 'co':
|
elif kind == 'co':
|
||||||
# XXX: are [COUNTRY] codes included in the results?
|
# XXX: are [COUNTRY] codes included in the results?
|
||||||
searchFunct = aSystem.search_company
|
searchFunct = aSystem.search_company
|
||||||
|
@ -852,24 +854,42 @@ class IMDbBase:
|
||||||
# exact match.
|
# exact match.
|
||||||
if len(searchRes) == 1:
|
if len(searchRes) == 1:
|
||||||
return searchRes[0].getID()
|
return searchRes[0].getID()
|
||||||
|
title_only_matches = []
|
||||||
for item in searchRes:
|
for item in searchRes:
|
||||||
# Return the first perfect match.
|
# Return the first perfect match.
|
||||||
if item[check] == ton:
|
if item[check].strip('"') == ton:
|
||||||
return item.getID()
|
# For titles do additional check for kind
|
||||||
|
if kind != 'tt' or title_kind == item['kind']:
|
||||||
|
return item.getID()
|
||||||
|
elif kind == 'tt':
|
||||||
|
title_only_matches.append(item.getID())
|
||||||
|
# imdbpy2sql.py could detected wrong type, so if no title and kind
|
||||||
|
# matches found - collect all results with title only match
|
||||||
|
# Return list of IDs if multiple matches (can happen when searching
|
||||||
|
# titles with no title_kind specified)
|
||||||
|
# Example: DB: Band of Brothers "tv series" vs "tv mini-series"
|
||||||
|
if title_only_matches:
|
||||||
|
if len(title_only_matches) == 1:
|
||||||
|
return title_only_matches[0]
|
||||||
|
else:
|
||||||
|
return title_only_matches
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def title2imdbID(self, title):
|
def title2imdbID(self, title, kind=None):
|
||||||
"""Translate a movie title (in the plain text data files format)
|
"""Translate a movie title (in the plain text data files format)
|
||||||
to an imdbID.
|
to an imdbID.
|
||||||
Try an Exact Primary Title search on IMDb;
|
Try an Exact Primary Title search on IMDb;
|
||||||
return None if it's unable to get the imdbID."""
|
return None if it's unable to get the imdbID;
|
||||||
return self._searchIMDb('tt', title)
|
Always specify kind: movie, tv series, video game etc. or search can
|
||||||
|
return list of IDs if multiple matches found
|
||||||
|
"""
|
||||||
|
return self._searchIMDb('tt', title, kind)
|
||||||
|
|
||||||
def name2imdbID(self, name):
|
def name2imdbID(self, name):
|
||||||
"""Translate a person name in an imdbID.
|
"""Translate a person name in an imdbID.
|
||||||
Try an Exact Primary Name search on IMDb;
|
Try an Exact Primary Name search on IMDb;
|
||||||
return None if it's unable to get the imdbID."""
|
return None if it's unable to get the imdbID."""
|
||||||
return self._searchIMDb('tt', name)
|
return self._searchIMDb('nm', name)
|
||||||
|
|
||||||
def character2imdbID(self, name):
|
def character2imdbID(self, name):
|
||||||
"""Translate a character name in an imdbID.
|
"""Translate a character name in an imdbID.
|
||||||
|
@ -896,7 +916,8 @@ class IMDbBase:
|
||||||
imdbID = aSystem.get_imdbMovieID(mop.movieID)
|
imdbID = aSystem.get_imdbMovieID(mop.movieID)
|
||||||
else:
|
else:
|
||||||
imdbID = aSystem.title2imdbID(build_title(mop, canonical=0,
|
imdbID = aSystem.title2imdbID(build_title(mop, canonical=0,
|
||||||
ptdf=1))
|
ptdf=0, appendKind=False),
|
||||||
|
mop['kind'])
|
||||||
elif isinstance(mop, Person.Person):
|
elif isinstance(mop, Person.Person):
|
||||||
if mop.personID is not None:
|
if mop.personID is not None:
|
||||||
imdbID = aSystem.get_imdbPersonID(mop.personID)
|
imdbID = aSystem.get_imdbPersonID(mop.personID)
|
||||||
|
|
|
@ -29,7 +29,7 @@
|
||||||
|
|
||||||
[imdbpy]
|
[imdbpy]
|
||||||
## Default.
|
## Default.
|
||||||
accessSystem = mobile
|
accessSystem = http
|
||||||
|
|
||||||
## Optional (options common to every data access system):
|
## Optional (options common to every data access system):
|
||||||
# Activate adult searches (on, by default).
|
# Activate adult searches (on, by default).
|
||||||
|
@ -37,7 +37,7 @@ accessSystem = mobile
|
||||||
# Number of results for searches (20 by default).
|
# Number of results for searches (20 by default).
|
||||||
#results = 20
|
#results = 20
|
||||||
# Re-raise all caught exceptions (off, by default).
|
# Re-raise all caught exceptions (off, by default).
|
||||||
reraiseExceptions = on
|
#reraiseExceptions = off
|
||||||
|
|
||||||
## Optional (options common to http and mobile data access systems):
|
## Optional (options common to http and mobile data access systems):
|
||||||
# Proxy used to access the network. If it requires authentication,
|
# Proxy used to access the network. If it requires authentication,
|
||||||
|
@ -69,7 +69,7 @@ reraiseExceptions = on
|
||||||
## Set the threshold for logging messages.
|
## Set the threshold for logging messages.
|
||||||
# Can be one of "debug", "info", "warning", "error", "critical" (default:
|
# Can be one of "debug", "info", "warning", "error", "critical" (default:
|
||||||
# "warning").
|
# "warning").
|
||||||
loggingLevel = info
|
#loggingLevel = debug
|
||||||
|
|
||||||
## Path to a configuration file for the logging facility;
|
## Path to a configuration file for the logging facility;
|
||||||
# see: http://docs.python.org/library/logging.html#configuring-logging
|
# see: http://docs.python.org/library/logging.html#configuring-logging
|
||||||
|
|
|
@ -64,8 +64,10 @@ LANG_ARTICLES = {
|
||||||
'English': ('the', 'a', 'an'),
|
'English': ('the', 'a', 'an'),
|
||||||
'Italian': ('la', 'le', "l'", 'il', 'i', 'un', 'una', 'gli', 'lo', "un'",
|
'Italian': ('la', 'le', "l'", 'il', 'i', 'un', 'una', 'gli', 'lo', "un'",
|
||||||
'uno'),
|
'uno'),
|
||||||
'Spanish': ('la', 'le', 'el', 'les', 'un', 'los', 'una', 'uno', 'unos',
|
'Spanish': ('la', 'lo', 'el', 'las', 'un', 'los', 'una', 'al', 'del',
|
||||||
'unas'),
|
'unos', 'unas', 'uno'),
|
||||||
|
'French': ('le', "l'", 'la', 'les', 'un', 'une', 'des', 'au', 'du', '\xc3\xa0 la',
|
||||||
|
'de la', 'aux'),
|
||||||
'Portuguese': ('a', 'as', 'o', 'os', 'um', 'uns', 'uma', 'umas'),
|
'Portuguese': ('a', 'as', 'o', 'os', 'um', 'uns', 'uma', 'umas'),
|
||||||
'Turkish': (), # Some languages doesn't have articles.
|
'Turkish': (), # Some languages doesn't have articles.
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python
|
||||||
"""
|
"""
|
||||||
generatepot.py script.
|
generatepot.py script.
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: iso-8859-1 -*-
|
||||||
"""Generate binary message catalog from textual translation description.
|
"""Generate binary message catalog from textual translation description.
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python
|
||||||
"""
|
"""
|
||||||
rebuildmo.py script.
|
rebuildmo.py script.
|
||||||
|
|
||||||
|
|
|
@ -104,15 +104,24 @@ PY_VERSION = sys.version_info[:2]
|
||||||
# The cookies for the "adult" search.
|
# The cookies for the "adult" search.
|
||||||
# Please don't mess with these account.
|
# Please don't mess with these account.
|
||||||
# Old 'IMDbPY' account.
|
# Old 'IMDbPY' account.
|
||||||
_old_cookie_id = 'boM2bYxz9MCsOnH9gZ0S9QHs12NWrNdApxsls1Vb5/NGrNdjcHx3dUas10UASoAjVEvhAbGagERgOpNkAPvxdbfKwaV2ikEj9SzXY1WPxABmDKQwdqzwRbM+12NSeJFGUEx3F8as10WwidLzVshDtxaPIbP13NdjVS9UZTYqgTVGrNcT9vyXU1'
|
_IMDbPY_cookie_id = 'boM2bYxz9MCsOnH9gZ0S9QHs12NWrNdApxsls1Vb5/NGrNdjcHx3dUas10UASoAjVEvhAbGagERgOpNkAPvxdbfKwaV2ikEj9SzXY1WPxABmDKQwdqzwRbM+12NSeJFGUEx3F8as10WwidLzVshDtxaPIbP13NdjVS9UZTYqgTVGrNcT9vyXU1'
|
||||||
_old_cookie_uu = '3M3AXsquTU5Gur/Svik+ewflPm5Rk2ieY3BIPlLjyK3C0Dp9F8UoPgbTyKiGtZp4x1X+uAUGKD7BM2g+dVd8eqEzDErCoYvdcvGLvVLAen1y08hNQtALjVKAe+1hM8g9QbNonlG1/t4S82ieUsBbrSIQbq1yhV6tZ6ArvSbA7rgHc8n5AdReyAmDaJ5Wm/ee3VDoCnGj/LlBs2ieUZNorhHDKK5Q=='
|
_IMDbPY_cookie_uu = '3M3AXsquTU5Gur/Svik+ewflPm5Rk2ieY3BIPlLjyK3C0Dp9F8UoPgbTyKiGtZp4x1X+uAUGKD7BM2g+dVd8eqEzDErCoYvdcvGLvVLAen1y08hNQtALjVKAe+1hM8g9QbNonlG1/t4S82ieUsBbrSIQbq1yhV6tZ6ArvSbA7rgHc8n5AdReyAmDaJ5Wm/ee3VDoCnGj/LlBs2ieUZNorhHDKK5Q=='
|
||||||
# New 'IMDbPYweb' account.
|
# 'imdbpy2010' account.
|
||||||
_cookie_id = 'rH1jNAkjTlNXvHolvBVBsgaPICNZbNdjVjzFwzas9JRmusdjVoqBs/Hs12NR+1WFxEoR9bGKEDUg6sNlADqXwkas12N131Rwdb+UQNGKN8PWrNdjcdqBQVLq8mbGDHP3hqzxhbD692NQi9D0JjpBtRaPIbP1zNdjUOqENQYv1ADWrNcT9vyXU1'
|
_imdbpy2010_cookie_id = 'QrCdxVi+L+WgqOLrQJJgBgRRXGInphxiBPU/YXSFDyExMFzCp6YcYgSVXyEUhS/xMID8wqemHGID4DlntwZ49vemP5UXsAxiJ4D6goSmHGIgNT9hMXBaRSF2vMS3phxB0bVfQiQlP1RxdrzhB6YcRHFASyIhQVowwXCKtDSlD2YhgRvxBsCKtGemHBKH9mxSI='
|
||||||
_cookie_uu = 'su4/m8cho4c6HP+W1qgq6wchOmhnF0w+lIWvHjRUPJ6nRA9sccEafjGADJ6hQGrMd4GKqLcz2X4z5+w+M4OIKnRn7FpENH7dxDQu3bQEHyx0ZEyeRFTPHfQEX03XF+yeN1dsPpcXaqjUZAw+lGRfXRQEfz3RIX9IgVEffdBAHw2wQXyf9xdMPrQELw0QNB8dsffsqcdQemjPB0w+moLcPh0JrKrHJ9hjBzdMPpcXTH7XRwwOk='
|
_imdbpy2010_cookie_uu = 'oiEo2yoJFCA2Zbn/o7Z1LAPIwotAu6QdALv3foDb1x5F/tdrFY63XkSfty4kntS8Y8jkHSDLt3406+d+JThEilPI0mtTaOQdA/t2/iErp22jaLdeVU5ya4PIREpj7HFdpzhEHadcIAngSER50IoHDpD6Bz4Qy3b+UIhE/hBbhz5Q63ceA2hEvhPo5B0FnrL9Q8jkWjDIbA0Au3d+AOtnXoCIRL4Q28c+UOtnXpP4RL4T6OQdA+6ijUCI5B0AW2d+UOtnXpPYRL4T6OQdA8jkTUOYlC0A=='
|
||||||
|
# old 'IMDbPYweb' account.
|
||||||
|
_old_IMDbPYweb_cookie_id = 'rH1jNAkjTlNXvHolvBVBsgaPICNZbNdjVjzFwzas9JRmusdjVoqBs/Hs12NR+1WFxEoR9bGKEDUg6sNlADqXwkas12N131Rwdb+UQNGKN8PWrNdjcdqBQVLq8mbGDHP3hqzxhbD692NQi9D0JjpBtRaPIbP1zNdjUOqENQYv1ADWrNcT9vyXU1'
|
||||||
|
_old_IMDbPYweb_cookie_uu = 'su4/m8cho4c6HP+W1qgq6wchOmhnF0w+lIWvHjRUPJ6nRA9sccEafjGADJ6hQGrMd4GKqLcz2X4z5+w+M4OIKnRn7FpENH7dxDQu3bQEHyx0ZEyeRFTPHfQEX03XF+yeN1dsPpcXaqjUZAw+lGRfXRQEfz3RIX9IgVEffdBAHw2wQXyf9xdMPrQELw0QNB8dsffsqcdQemjPB0w+moLcPh0JrKrHJ9hjBzdMPpcXTH7XRwwOk='
|
||||||
|
# old 'IMDbPYweb' account values (as of 2012-12-30)
|
||||||
|
_IMDbPYweb_cookie_id = 'BCYjtpb46Go0cMHAMewWZEauhwqPL7ASCPpPVNutu6BuayHZd0U6Dk3UAqVlEM8DHLDsSr02RGQn5ff3245-R4A130NAWJ_5yqXx7X-zJey8vQM8JKdv3rTUSEJznJQlojUW1Bije-Q0FXAixs4I0sePWhd_tA41i-9AF2q3lPmaksram6ilMhN9i3IPESW1PMbk'
|
||||||
|
_IMDbPYweb_cookie_uu = 'BCYttQjEMc-NyUdFUGxThidAnBo7wwalEzj4un9uzf2XoEjtqDhNfrH7bOSuwlRkMEQ11SNyTajl-b9Q-21m4HwYu0e3jXZrjYLXLYzFkrEroCDyUREqaTwPJPSjGtFmvlaVBZEZmsWpaxe18DT5KiygKyGPZKH78Xu4im6ba-Sd31WvbXHzP8KGXPpGjhhVuv7Dcv314HCWkE832Srf9ya-Uv0FdGAmYyLbIAXuxnvpYQd6oZ8-CYkSGLIqcKWdrf5S'
|
||||||
|
# 'IMDbPY2013' account
|
||||||
|
_IMDbPY2013_cookie_id = 'BCYmoyqSm2WglmOzG-SrFWSvVpxsTZOB0qEOOqmAwCBxCbaNgKOxd0DTKzUvt7t04Pya5gV2tUrpDmYxrc1Dr54DQj2UXI7QI35__M5-HI2KrbOI3PjDz6M-_U3HG8topMfN64R24tmBixoZhMYXVaEc556lf0Z4gQNJVYRANXvwytP5v1lpfeToRlu9aVJwN4kT'
|
||||||
|
_IMDbPY2013_cookie_uu = 'BCYquDS8Y2i8R1pJxS4nB77YrhjHHXeOea2Xl9KtZvE6RZKVfMvzTGU4Vl5-yxfPbgRSiFJasyf-hhPuVvXyaHlfeBjNlbFT8hz2HzFFkQ_SxKxq05J51gi7Fv4SaAws1M-i7zmQ1TRunfJqCVIYqPwIs2NO7s4_YDH2ZoISVGLgca8OY2K58HychOZB1oRWHVeAJNhLJMrCWJBuGRLCNnQK5X9tA0dPPntr2Ussy0ouul-N1GQz-8y5vda3JJ_C6xkwmHcA6JrOdOFO_HqMWjVSXuxGEdrXC919JM9H0vooVvKeVgAEJnTh2GiVlUJUoH3c'
|
||||||
|
|
||||||
# imdbpy2010 account.
|
# Currently used account.
|
||||||
#_cookie_id = 'QrCdxVi+L+WgqOLrQJJgBgRRXGInphxiBPU/YXSFDyExMFzCp6YcYgSVXyEUhS/xMID8wqemHGID4DlntwZ49vemP5UXsAxiJ4D6goSmHGIgNT9hMXBaRSF2vMS3phxB0bVfQiQlP1RxdrzhB6YcRHFASyIhQVowwXCKtDSlD2YhgRvxBsCKtGemHBKH9mxSI='
|
_cookie_id = _IMDbPY2013_cookie_id
|
||||||
#_cookie_uu = 'oiEo2yoJFCA2Zbn/o7Z1LAPIwotAu6QdALv3foDb1x5F/tdrFY63XkSfty4kntS8Y8jkHSDLt3406+d+JThEilPI0mtTaOQdA/t2/iErp22jaLdeVU5ya4PIREpj7HFdpzhEHadcIAngSER50IoHDpD6Bz4Qy3b+UIhE/hBbhz5Q63ceA2hEvhPo5B0FnrL9Q8jkWjDIbA0Au3d+AOtnXoCIRL4Q28c+UOtnXpP4RL4T6OQdA+6ijUCI5B0AW2d+UOtnXpPYRL4T6OQdA8jkTUOYlC0A=='
|
_cookie_uu = _IMDbPY2013_cookie_uu
|
||||||
|
|
||||||
|
|
||||||
class _FakeURLOpener(object):
|
class _FakeURLOpener(object):
|
||||||
|
@ -141,9 +150,10 @@ class IMDbURLopener(FancyURLopener):
|
||||||
for header in ('User-Agent', 'User-agent', 'user-agent'):
|
for header in ('User-Agent', 'User-agent', 'user-agent'):
|
||||||
self.del_header(header)
|
self.del_header(header)
|
||||||
self.set_header('User-Agent', 'Mozilla/5.0')
|
self.set_header('User-Agent', 'Mozilla/5.0')
|
||||||
|
self.set_header('Accept-Language', 'en-us,en;q=0.5')
|
||||||
# XXX: This class is used also to perform "Exact Primary
|
# XXX: This class is used also to perform "Exact Primary
|
||||||
# [Title|Name]" searches, and so by default the cookie is set.
|
# [Title|Name]" searches, and so by default the cookie is set.
|
||||||
c_header = 'id=%s; uu=%s' % (_cookie_id, _cookie_uu)
|
c_header = 'uu=%s; id=%s' % (_cookie_uu, _cookie_id)
|
||||||
self.set_header('Cookie', c_header)
|
self.set_header('Cookie', c_header)
|
||||||
|
|
||||||
def get_proxy(self):
|
def get_proxy(self):
|
||||||
|
@ -199,12 +209,11 @@ class IMDbURLopener(FancyURLopener):
|
||||||
server_encode = uopener.info().getparam('charset')
|
server_encode = uopener.info().getparam('charset')
|
||||||
# Otherwise, look at the content-type HTML meta tag.
|
# Otherwise, look at the content-type HTML meta tag.
|
||||||
if server_encode is None and content:
|
if server_encode is None and content:
|
||||||
first_bytes = content[:512]
|
begin_h = content.find('text/html; charset=')
|
||||||
begin_h = first_bytes.find('text/html; charset=')
|
|
||||||
if begin_h != -1:
|
if begin_h != -1:
|
||||||
end_h = first_bytes[19+begin_h:].find('"')
|
end_h = content[19+begin_h:].find('"')
|
||||||
if end_h != -1:
|
if end_h != -1:
|
||||||
server_encode = first_bytes[19+begin_h:19+begin_h+end_h]
|
server_encode = content[19+begin_h:19+begin_h+end_h]
|
||||||
if server_encode:
|
if server_encode:
|
||||||
try:
|
try:
|
||||||
if lookup(server_encode):
|
if lookup(server_encode):
|
||||||
|
@ -455,16 +464,16 @@ class IMDbHTTPAccessSystem(IMDbBase):
|
||||||
results is the maximum number of results to be retrieved."""
|
results is the maximum number of results to be retrieved."""
|
||||||
if isinstance(ton, unicode):
|
if isinstance(ton, unicode):
|
||||||
try:
|
try:
|
||||||
ton = ton.encode('iso8859-1')
|
ton = ton.encode('utf-8')
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
try:
|
try:
|
||||||
ton = ton.encode('utf-8')
|
ton = ton.encode('iso8859-1')
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
pass
|
pass
|
||||||
##params = 'q=%s&%s=on&mx=%s' % (quote_plus(ton), kind, str(results))
|
##params = 'q=%s&%s=on&mx=%s' % (quote_plus(ton), kind, str(results))
|
||||||
params = 'q=%s;s=%s;mx=%s' % (quote_plus(ton), kind, str(results))
|
params = 'q=%s&s=%s&mx=%s' % (quote_plus(ton), kind, str(results))
|
||||||
if kind == 'ep':
|
if kind == 'ep':
|
||||||
params = params.replace('s=ep;', 's=tt;ttype=ep;', 1)
|
params = params.replace('s=ep&', 's=tt&ttype=ep&', 1)
|
||||||
cont = self._retrieve(self.urls['find'] % params)
|
cont = self._retrieve(self.urls['find'] % params)
|
||||||
#print 'URL:', imdbURL_find % params
|
#print 'URL:', imdbURL_find % params
|
||||||
if cont.find('Your search returned more than') == -1 or \
|
if cont.find('Your search returned more than') == -1 or \
|
||||||
|
@ -472,7 +481,7 @@ class IMDbHTTPAccessSystem(IMDbBase):
|
||||||
return cont
|
return cont
|
||||||
# The retrieved page contains no results, because too many
|
# The retrieved page contains no results, because too many
|
||||||
# titles or names contain the string we're looking for.
|
# titles or names contain the string we're looking for.
|
||||||
params = 'q=%s;ls=%s;lm=0' % (quote_plus(ton), kind)
|
params = 'q=%s&ls=%s&lm=0' % (quote_plus(ton), kind)
|
||||||
size = 131072 + results * 512
|
size = 131072 + results * 512
|
||||||
return self._retrieve(self.urls['find'] % params, size=size)
|
return self._retrieve(self.urls['find'] % params, size=size)
|
||||||
|
|
||||||
|
@ -587,6 +596,10 @@ class IMDbHTTPAccessSystem(IMDbBase):
|
||||||
cont = self._retrieve(self.urls['movie_main'] % movieID + 'recommendations')
|
cont = self._retrieve(self.urls['movie_main'] % movieID + 'recommendations')
|
||||||
return self.mProxy.rec_parser.parse(cont)
|
return self.mProxy.rec_parser.parse(cont)
|
||||||
|
|
||||||
|
def get_movie_critic_reviews(self, movieID):
|
||||||
|
cont = self._retrieve(self.urls['movie_main'] % movieID + 'criticreviews')
|
||||||
|
return self.mProxy.criticrev_parser.parse(cont)
|
||||||
|
|
||||||
def get_movie_external_reviews(self, movieID):
|
def get_movie_external_reviews(self, movieID):
|
||||||
cont = self._retrieve(self.urls['movie_main'] % movieID + 'externalreviews')
|
cont = self._retrieve(self.urls['movie_main'] % movieID + 'externalreviews')
|
||||||
return self.mProxy.externalrev_parser.parse(cont)
|
return self.mProxy.externalrev_parser.parse(cont)
|
||||||
|
@ -754,7 +767,7 @@ class IMDbHTTPAccessSystem(IMDbBase):
|
||||||
return self.pProxy.person_keywords_parser.parse(cont)
|
return self.pProxy.person_keywords_parser.parse(cont)
|
||||||
|
|
||||||
def _search_character(self, name, results):
|
def _search_character(self, name, results):
|
||||||
cont = self._get_search_content('char', name, results)
|
cont = self._get_search_content('ch', name, results)
|
||||||
return self.scProxy.search_character_parser.parse(cont, results=results)['data']
|
return self.scProxy.search_character_parser.parse(cont, results=results)['data']
|
||||||
|
|
||||||
def get_character_main(self, characterID):
|
def get_character_main(self, characterID):
|
||||||
|
|
|
@ -9,7 +9,7 @@ pages would be:
|
||||||
plot summary: http://akas.imdb.com/title/tt0094226/plotsummary
|
plot summary: http://akas.imdb.com/title/tt0094226/plotsummary
|
||||||
...and so on...
|
...and so on...
|
||||||
|
|
||||||
Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
|
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
@ -531,9 +531,6 @@ class DOMHTMLMovieParser(DOMParserBase):
|
||||||
def _process_plotsummary(x):
|
def _process_plotsummary(x):
|
||||||
"""Process a plot (contributed by Rdian06)."""
|
"""Process a plot (contributed by Rdian06)."""
|
||||||
xauthor = x.get('author')
|
xauthor = x.get('author')
|
||||||
if xauthor:
|
|
||||||
xauthor = xauthor.replace('{', '<').replace('}', '>').replace('(',
|
|
||||||
'<').replace(')', '>').strip()
|
|
||||||
xplot = x.get('plot', u'').strip()
|
xplot = x.get('plot', u'').strip()
|
||||||
if xauthor:
|
if xauthor:
|
||||||
xplot += u'::%s' % xauthor
|
xplot += u'::%s' % xauthor
|
||||||
|
@ -555,17 +552,20 @@ class DOMHTMLPlotParser(DOMParserBase):
|
||||||
# Notice that recently IMDb started to put the email of the
|
# Notice that recently IMDb started to put the email of the
|
||||||
# author only in the link, that we're not collecting, here.
|
# author only in the link, that we're not collecting, here.
|
||||||
extractors = [Extractor(label='plot',
|
extractors = [Extractor(label='plot',
|
||||||
path="//p[@class='plotpar']",
|
path="//ul[@class='zebraList']//p",
|
||||||
attrs=Attribute(key='plot',
|
attrs=Attribute(key='plot',
|
||||||
multi=True,
|
multi=True,
|
||||||
path={'plot': './text()',
|
path={'plot': './text()[1]',
|
||||||
'author': './i/a/text()'},
|
'author': './span/em/a/text()'},
|
||||||
postprocess=_process_plotsummary))]
|
postprocess=_process_plotsummary))]
|
||||||
|
|
||||||
|
|
||||||
def _process_award(x):
|
def _process_award(x):
|
||||||
award = {}
|
award = {}
|
||||||
award['award'] = x.get('award').strip()
|
_award = x.get('award')
|
||||||
|
if _award is not None:
|
||||||
|
_award = _award.strip()
|
||||||
|
award['award'] = _award
|
||||||
if not award['award']:
|
if not award['award']:
|
||||||
return {}
|
return {}
|
||||||
award['year'] = x.get('year').strip()
|
award['year'] = x.get('year').strip()
|
||||||
|
@ -709,10 +709,16 @@ class DOMHTMLTaglinesParser(DOMParserBase):
|
||||||
result = tparser.parse(taglines_html_string)
|
result = tparser.parse(taglines_html_string)
|
||||||
"""
|
"""
|
||||||
extractors = [Extractor(label='taglines',
|
extractors = [Extractor(label='taglines',
|
||||||
path="//div[@id='tn15content']/p",
|
path='//*[contains(concat(" ", normalize-space(@class), " "), " soda ")]',
|
||||||
attrs=Attribute(key='taglines', multi=True,
|
attrs=Attribute(key='taglines',
|
||||||
|
multi=True,
|
||||||
path="./text()"))]
|
path="./text()"))]
|
||||||
|
|
||||||
|
def postprocess_data(self, data):
|
||||||
|
if 'taglines' in data:
|
||||||
|
data['taglines'] = [tagline.strip() for tagline in data['taglines']]
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
class DOMHTMLKeywordsParser(DOMParserBase):
|
class DOMHTMLKeywordsParser(DOMParserBase):
|
||||||
"""Parser for the "keywords" page of a given movie.
|
"""Parser for the "keywords" page of a given movie.
|
||||||
|
@ -785,9 +791,9 @@ class DOMHTMLSoundtrackParser(DOMHTMLAlternateVersionsParser):
|
||||||
]
|
]
|
||||||
|
|
||||||
def postprocess_data(self, data):
|
def postprocess_data(self, data):
|
||||||
if 'soundtrack' in data:
|
if 'alternate versions' in data:
|
||||||
nd = []
|
nd = []
|
||||||
for x in data['soundtrack']:
|
for x in data['alternate versions']:
|
||||||
ds = x.split('\n')
|
ds = x.split('\n')
|
||||||
title = ds[0]
|
title = ds[0]
|
||||||
if title[0] == '"' and title[-1] == '"':
|
if title[0] == '"' and title[-1] == '"':
|
||||||
|
@ -846,6 +852,13 @@ class DOMHTMLCrazyCreditsParser(DOMParserBase):
|
||||||
x.replace('\n', ' ').replace(' ', ' ')))]
|
x.replace('\n', ' ').replace(' ', ' ')))]
|
||||||
|
|
||||||
|
|
||||||
|
def _process_goof(x):
|
||||||
|
if x['spoiler_category']:
|
||||||
|
return x['spoiler_category'].strip() + ': SPOILER: ' + x['text'].strip()
|
||||||
|
else:
|
||||||
|
return x['category'].strip() + ': ' + x['text'].strip()
|
||||||
|
|
||||||
|
|
||||||
class DOMHTMLGoofsParser(DOMParserBase):
|
class DOMHTMLGoofsParser(DOMParserBase):
|
||||||
"""Parser for the "goofs" page of a given movie.
|
"""Parser for the "goofs" page of a given movie.
|
||||||
The page should be provided as a string, as taken from
|
The page should be provided as a string, as taken from
|
||||||
|
@ -858,9 +871,14 @@ class DOMHTMLGoofsParser(DOMParserBase):
|
||||||
"""
|
"""
|
||||||
_defGetRefs = True
|
_defGetRefs = True
|
||||||
|
|
||||||
extractors = [Extractor(label='goofs', path="//ul[@class='trivia']/li",
|
extractors = [Extractor(label='goofs', path="//div[@class='soda odd']",
|
||||||
attrs=Attribute(key='goofs', multi=True, path=".//text()",
|
attrs=Attribute(key='goofs', multi=True,
|
||||||
postprocess=lambda x: (x or u'').strip()))]
|
path={
|
||||||
|
'text':"./text()",
|
||||||
|
'category':'./preceding-sibling::h4[1]/text()',
|
||||||
|
'spoiler_category': './h4/text()'
|
||||||
|
},
|
||||||
|
postprocess=_process_goof))]
|
||||||
|
|
||||||
|
|
||||||
class DOMHTMLQuotesParser(DOMParserBase):
|
class DOMHTMLQuotesParser(DOMParserBase):
|
||||||
|
@ -876,9 +894,16 @@ class DOMHTMLQuotesParser(DOMParserBase):
|
||||||
_defGetRefs = True
|
_defGetRefs = True
|
||||||
|
|
||||||
extractors = [
|
extractors = [
|
||||||
Extractor(label='quotes',
|
Extractor(label='quotes_odd',
|
||||||
path="//div[@class='_imdbpy']",
|
path="//div[@class='quote soda odd']",
|
||||||
attrs=Attribute(key='quotes',
|
attrs=Attribute(key='quotes_odd',
|
||||||
|
multi=True,
|
||||||
|
path=".//text()",
|
||||||
|
postprocess=lambda x: x.strip().replace(' \n',
|
||||||
|
'::').replace('::\n', '::').replace('\n', ' '))),
|
||||||
|
Extractor(label='quotes_even',
|
||||||
|
path="//div[@class='quote soda even']",
|
||||||
|
attrs=Attribute(key='quotes_even',
|
||||||
multi=True,
|
multi=True,
|
||||||
path=".//text()",
|
path=".//text()",
|
||||||
postprocess=lambda x: x.strip().replace(' \n',
|
postprocess=lambda x: x.strip().replace(' \n',
|
||||||
|
@ -886,27 +911,23 @@ class DOMHTMLQuotesParser(DOMParserBase):
|
||||||
]
|
]
|
||||||
|
|
||||||
preprocessors = [
|
preprocessors = [
|
||||||
(re.compile('(<a name="?qt[0-9]{7}"?></a>)', re.I),
|
(re.compile('<a href="#" class="hidesoda hidden">Hide options</a><br>', re.I), '')
|
||||||
r'\1<div class="_imdbpy">'),
|
]
|
||||||
(re.compile('<hr width="30%">', re.I), '</div>'),
|
|
||||||
(re.compile('<hr/>', re.I), '</div>'),
|
|
||||||
(re.compile('<script.*?</script>', re.I|re.S), ''),
|
|
||||||
# For BeautifulSoup.
|
|
||||||
(re.compile('<!-- sid: t-channel : MIDDLE_CENTER -->', re.I), '</div>')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_dom(self, dom):
|
def preprocess_dom(self, dom):
|
||||||
# Remove "link this quote" links.
|
# Remove "link this quote" links.
|
||||||
for qLink in self.xpath(dom, "//p[@class='linksoda']"):
|
for qLink in self.xpath(dom, "//span[@class='linksoda']"):
|
||||||
|
qLink.drop_tree()
|
||||||
|
for qLink in self.xpath(dom, "//div[@class='sharesoda_pre']"):
|
||||||
qLink.drop_tree()
|
qLink.drop_tree()
|
||||||
return dom
|
return dom
|
||||||
|
|
||||||
def postprocess_data(self, data):
|
def postprocess_data(self, data):
|
||||||
if 'quotes' not in data:
|
quotes = data.get('quotes_odd', []) + data.get('quotes_even', [])
|
||||||
|
if not quotes:
|
||||||
return {}
|
return {}
|
||||||
for idx, quote in enumerate(data['quotes']):
|
quotes = [q.split('::') for q in quotes]
|
||||||
data['quotes'][idx] = quote.split('::')
|
return {'quotes': quotes}
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
class DOMHTMLReleaseinfoParser(DOMParserBase):
|
class DOMHTMLReleaseinfoParser(DOMParserBase):
|
||||||
|
@ -920,13 +941,13 @@ class DOMHTMLReleaseinfoParser(DOMParserBase):
|
||||||
result = rdparser.parse(releaseinfo_html_string)
|
result = rdparser.parse(releaseinfo_html_string)
|
||||||
"""
|
"""
|
||||||
extractors = [Extractor(label='release dates',
|
extractors = [Extractor(label='release dates',
|
||||||
path="//th[@class='xxxx']/../../tr",
|
path="//table[@id='release_dates']//tr",
|
||||||
attrs=Attribute(key='release dates', multi=True,
|
attrs=Attribute(key='release dates', multi=True,
|
||||||
path={'country': ".//td[1]//text()",
|
path={'country': ".//td[1]//text()",
|
||||||
'date': ".//td[2]//text()",
|
'date': ".//td[2]//text()",
|
||||||
'notes': ".//td[3]//text()"})),
|
'notes': ".//td[3]//text()"})),
|
||||||
Extractor(label='akas',
|
Extractor(label='akas',
|
||||||
path="//div[@class='_imdbpy_akas']/table/tr",
|
path="//table[@id='akas']//tr",
|
||||||
attrs=Attribute(key='akas', multi=True,
|
attrs=Attribute(key='akas', multi=True,
|
||||||
path={'title': "./td[1]/text()",
|
path={'title': "./td[1]/text()",
|
||||||
'countries': "./td[2]/text()"}))]
|
'countries': "./td[2]/text()"}))]
|
||||||
|
@ -961,7 +982,7 @@ class DOMHTMLReleaseinfoParser(DOMParserBase):
|
||||||
title = (aka.get('title') or '').strip()
|
title = (aka.get('title') or '').strip()
|
||||||
if not title:
|
if not title:
|
||||||
continue
|
continue
|
||||||
countries = (aka.get('countries') or '').split('/')
|
countries = (aka.get('countries') or '').split(',')
|
||||||
if not countries:
|
if not countries:
|
||||||
nakas.append(title)
|
nakas.append(title)
|
||||||
else:
|
else:
|
||||||
|
@ -1135,7 +1156,28 @@ def _normalize_href(href):
|
||||||
href = '%s%s' % (imdbURL_base, href)
|
href = '%s%s' % (imdbURL_base, href)
|
||||||
return href
|
return href
|
||||||
|
|
||||||
|
class DOMHTMLCriticReviewsParser(DOMParserBase):
|
||||||
|
"""Parser for the "critic reviews" pages of a given movie.
|
||||||
|
The page should be provided as a string, as taken from
|
||||||
|
the akas.imdb.com server. The final result will be a
|
||||||
|
dictionary, with a key for every relevant section.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
osparser = DOMHTMLCriticReviewsParser()
|
||||||
|
result = osparser.parse(officialsites_html_string)
|
||||||
|
"""
|
||||||
|
kind = 'critic reviews'
|
||||||
|
|
||||||
|
extractors = [
|
||||||
|
Extractor(label='metascore',
|
||||||
|
path="//div[@class='metascore_wrap']/div/span",
|
||||||
|
attrs=Attribute(key='metascore',
|
||||||
|
path=".//text()")),
|
||||||
|
Extractor(label='metacritic url',
|
||||||
|
path="//div[@class='article']/div[@class='see-more']/a",
|
||||||
|
attrs=Attribute(key='metacritic url',
|
||||||
|
path="./@href")) ]
|
||||||
|
|
||||||
class DOMHTMLOfficialsitesParser(DOMParserBase):
|
class DOMHTMLOfficialsitesParser(DOMParserBase):
|
||||||
"""Parser for the "official sites", "external reviews", "newsgroup
|
"""Parser for the "official sites", "external reviews", "newsgroup
|
||||||
reviews", "miscellaneous links", "sound clips", "video clips" and
|
reviews", "miscellaneous links", "sound clips", "video clips" and
|
||||||
|
@ -1471,6 +1513,14 @@ class DOMHTMLSeasonEpisodesParser(DOMParserBase):
|
||||||
try: selected_season = int(selected_season)
|
try: selected_season = int(selected_season)
|
||||||
except: pass
|
except: pass
|
||||||
nd = {selected_season: {}}
|
nd = {selected_season: {}}
|
||||||
|
if 'episode -1' in data:
|
||||||
|
counter = 1
|
||||||
|
for episode in data['episode -1']:
|
||||||
|
while 'episode %d' % counter in data:
|
||||||
|
counter += 1
|
||||||
|
k = 'episode %d' % counter
|
||||||
|
data[k] = [episode]
|
||||||
|
del data['episode -1']
|
||||||
for episode_nr, episode in data.iteritems():
|
for episode_nr, episode in data.iteritems():
|
||||||
if not (episode and episode[0] and
|
if not (episode and episode[0] and
|
||||||
episode_nr.startswith('episode ')):
|
episode_nr.startswith('episode ')):
|
||||||
|
@ -1860,6 +1910,8 @@ _OBJECTS = {
|
||||||
'releasedates_parser': ((DOMHTMLReleaseinfoParser,), None),
|
'releasedates_parser': ((DOMHTMLReleaseinfoParser,), None),
|
||||||
'ratings_parser': ((DOMHTMLRatingsParser,), None),
|
'ratings_parser': ((DOMHTMLRatingsParser,), None),
|
||||||
'officialsites_parser': ((DOMHTMLOfficialsitesParser,), None),
|
'officialsites_parser': ((DOMHTMLOfficialsitesParser,), None),
|
||||||
|
'criticrev_parser': ((DOMHTMLCriticReviewsParser,),
|
||||||
|
{'kind': 'critic reviews'}),
|
||||||
'externalrev_parser': ((DOMHTMLOfficialsitesParser,),
|
'externalrev_parser': ((DOMHTMLOfficialsitesParser,),
|
||||||
{'kind': 'external reviews'}),
|
{'kind': 'external reviews'}),
|
||||||
'newsgrouprev_parser': ((DOMHTMLOfficialsitesParser,),
|
'newsgrouprev_parser': ((DOMHTMLOfficialsitesParser,),
|
||||||
|
|
|
@ -8,7 +8,7 @@ E.g., for "Mel Gibson" the referred pages would be:
|
||||||
biography: http://akas.imdb.com/name/nm0000154/bio
|
biography: http://akas.imdb.com/name/nm0000154/bio
|
||||||
...and so on...
|
...and so on...
|
||||||
|
|
||||||
Copyright 2004-20101 Davide Alberani <da@erlug.linux.it>
|
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
@ -60,6 +60,7 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
|
||||||
result = cparser.parse(categorized_html_string)
|
result = cparser.parse(categorized_html_string)
|
||||||
"""
|
"""
|
||||||
_containsObjects = True
|
_containsObjects = True
|
||||||
|
_name_imdb_index = re.compile(r'\([IVXLCDM]+\)')
|
||||||
|
|
||||||
_birth_attrs = [Attribute(key='birth date',
|
_birth_attrs = [Attribute(key='birth date',
|
||||||
path='.//time[@itemprop="birthDate"]/@datetime'),
|
path='.//time[@itemprop="birthDate"]/@datetime'),
|
||||||
|
@ -100,6 +101,10 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
|
||||||
path=".//text()",
|
path=".//text()",
|
||||||
postprocess=lambda x: analyze_name(x,
|
postprocess=lambda x: analyze_name(x,
|
||||||
canonical=1))),
|
canonical=1))),
|
||||||
|
Extractor(label='name_index',
|
||||||
|
path="//h1[@class='header']/span[1]",
|
||||||
|
attrs=Attribute(key='name_index',
|
||||||
|
path="./text()")),
|
||||||
|
|
||||||
Extractor(label='birth info',
|
Extractor(label='birth info',
|
||||||
path="//div[h4='Born:']",
|
path="//div[h4='Born:']",
|
||||||
|
@ -110,7 +115,7 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
|
||||||
attrs=_death_attrs),
|
attrs=_death_attrs),
|
||||||
|
|
||||||
Extractor(label='headshot',
|
Extractor(label='headshot',
|
||||||
path="//td[@id='img_primary']/a",
|
path="//td[@id='img_primary']/div[@class='image']/a",
|
||||||
attrs=Attribute(key='headshot',
|
attrs=Attribute(key='headshot',
|
||||||
path="./img/@src")),
|
path="./img/@src")),
|
||||||
|
|
||||||
|
@ -152,6 +157,11 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
|
||||||
for what in 'birth date', 'death date':
|
for what in 'birth date', 'death date':
|
||||||
if what in data and not data[what]:
|
if what in data and not data[what]:
|
||||||
del data[what]
|
del data[what]
|
||||||
|
name_index = (data.get('name_index') or '').strip()
|
||||||
|
if name_index:
|
||||||
|
if self._name_imdb_index.match(name_index):
|
||||||
|
data['imdbIndex'] = name_index[1:-1]
|
||||||
|
del data['name_index']
|
||||||
# XXX: the code below is for backwards compatibility
|
# XXX: the code below is for backwards compatibility
|
||||||
# probably could be removed
|
# probably could be removed
|
||||||
for key in data.keys():
|
for key in data.keys():
|
||||||
|
@ -220,13 +230,13 @@ class DOMHTMLBioParser(DOMParserBase):
|
||||||
attrs=Attribute(key='headshot',
|
attrs=Attribute(key='headshot',
|
||||||
path="./img/@src")),
|
path="./img/@src")),
|
||||||
Extractor(label='birth info',
|
Extractor(label='birth info',
|
||||||
path="//div[h5='Date of Birth']",
|
path="//table[@id='overviewTable']//td[text()='Date of Birth']/following-sibling::td[1]",
|
||||||
attrs=_birth_attrs),
|
attrs=_birth_attrs),
|
||||||
Extractor(label='death info',
|
Extractor(label='death info',
|
||||||
path="//div[h5='Date of Death']",
|
path="//table[@id='overviewTable']//td[text()='Date of Death']/following-sibling::td[1]",
|
||||||
attrs=_death_attrs),
|
attrs=_death_attrs),
|
||||||
Extractor(label='nick names',
|
Extractor(label='nick names',
|
||||||
path="//div[h5='Nickname']",
|
path="//table[@id='overviewTable']//td[text()='Nickenames']/following-sibling::td[1]",
|
||||||
attrs=Attribute(key='nick names',
|
attrs=Attribute(key='nick names',
|
||||||
path="./text()",
|
path="./text()",
|
||||||
joiner='|',
|
joiner='|',
|
||||||
|
@ -234,25 +244,25 @@ class DOMHTMLBioParser(DOMParserBase):
|
||||||
'::(', 1) for n in x.split('|')
|
'::(', 1) for n in x.split('|')
|
||||||
if n.strip()])),
|
if n.strip()])),
|
||||||
Extractor(label='birth name',
|
Extractor(label='birth name',
|
||||||
path="//div[h5='Birth Name']",
|
path="//table[@id='overviewTable']//td[text()='Birth Name']/following-sibling::td[1]",
|
||||||
attrs=Attribute(key='birth name',
|
attrs=Attribute(key='birth name',
|
||||||
path="./text()",
|
path="./text()",
|
||||||
postprocess=lambda x: canonicalName(x.strip()))),
|
postprocess=lambda x: canonicalName(x.strip()))),
|
||||||
Extractor(label='height',
|
Extractor(label='height',
|
||||||
path="//div[h5='Height']",
|
path="//table[@id='overviewTable']//td[text()='Height']/following-sibling::td[1]",
|
||||||
attrs=Attribute(key='height',
|
attrs=Attribute(key='height',
|
||||||
path="./text()",
|
path="./text()",
|
||||||
postprocess=lambda x: x.strip())),
|
postprocess=lambda x: x.strip())),
|
||||||
Extractor(label='mini biography',
|
Extractor(label='mini biography',
|
||||||
path="//div[h5='Mini Biography']",
|
path="//a[@name='mini_bio']/following-sibling::div[1 = count(preceding-sibling::a[1] | ../a[@name='mini_bio'])]",
|
||||||
attrs=Attribute(key='mini biography',
|
attrs=Attribute(key='mini biography',
|
||||||
multi=True,
|
multi=True,
|
||||||
path={
|
path={
|
||||||
'bio': "./p//text()",
|
'bio': ".//text()",
|
||||||
'by': "./b/following-sibling::a/text()"
|
'by': ".//a[@name='ba']//text()"
|
||||||
},
|
},
|
||||||
postprocess=lambda x: "%s::%s" % \
|
postprocess=lambda x: "%s::%s" % \
|
||||||
(x.get('bio').strip(),
|
((x.get('bio') or u'').split('- IMDb Mini Biography By:')[0].strip(),
|
||||||
(x.get('by') or u'').strip() or u'Anonymous'))),
|
(x.get('by') or u'').strip() or u'Anonymous'))),
|
||||||
Extractor(label='spouse',
|
Extractor(label='spouse',
|
||||||
path="//div[h5='Spouse']/table/tr",
|
path="//div[h5='Spouse']/table/tr",
|
||||||
|
|
|
@ -5,9 +5,9 @@ This module provides the HTMLSearchCharacterParser class (and the
|
||||||
search_character_parser instance), used to parse the results of a search
|
search_character_parser instance), used to parse the results of a search
|
||||||
for a given character.
|
for a given character.
|
||||||
E.g., when searching for the name "Jesse James", the parsed page would be:
|
E.g., when searching for the name "Jesse James", the parsed page would be:
|
||||||
http://akas.imdb.com/find?s=Characters;mx=20;q=Jesse+James
|
http://akas.imdb.com/find?s=ch;mx=20;q=Jesse+James
|
||||||
|
|
||||||
Copyright 2007-2009 Davide Alberani <da@erlug.linux.it>
|
Copyright 2007-2012 Davide Alberani <da@erlug.linux.it>
|
||||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
@ -42,7 +42,7 @@ class DOMBasicCharacterParser(DOMBasicMovieParser):
|
||||||
|
|
||||||
class DOMHTMLSearchCharacterParser(DOMHTMLSearchMovieParser):
|
class DOMHTMLSearchCharacterParser(DOMHTMLSearchMovieParser):
|
||||||
_BaseParser = DOMBasicCharacterParser
|
_BaseParser = DOMBasicCharacterParser
|
||||||
_notDirectHitTitle = '<title>imdb search'
|
_notDirectHitTitle = '<title>find - imdb'
|
||||||
_titleBuilder = lambda self, x: build_name(x, canonical=False)
|
_titleBuilder = lambda self, x: build_name(x, canonical=False)
|
||||||
_linkPrefix = '/character/ch'
|
_linkPrefix = '/character/ch'
|
||||||
|
|
||||||
|
@ -57,7 +57,7 @@ class DOMHTMLSearchCharacterParser(DOMHTMLSearchMovieParser):
|
||||||
{'name': x.get('name')}
|
{'name': x.get('name')}
|
||||||
))]
|
))]
|
||||||
extractors = [Extractor(label='search',
|
extractors = [Extractor(label='search',
|
||||||
path="//td[3]/a[starts-with(@href, " \
|
path="//td[@class='result_text']/a[starts-with(@href, " \
|
||||||
"'/character/ch')]/..",
|
"'/character/ch')]/..",
|
||||||
attrs=_attrs)]
|
attrs=_attrs)]
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ for a given company.
|
||||||
E.g., when searching for the name "Columbia Pictures", the parsed page would be:
|
E.g., when searching for the name "Columbia Pictures", the parsed page would be:
|
||||||
http://akas.imdb.com/find?s=co;mx=20;q=Columbia+Pictures
|
http://akas.imdb.com/find?s=co;mx=20;q=Columbia+Pictures
|
||||||
|
|
||||||
Copyright 2008-2009 Davide Alberani <da@erlug.linux.it>
|
Copyright 2008-2012 Davide Alberani <da@erlug.linux.it>
|
||||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
@ -42,7 +42,7 @@ class DOMBasicCompanyParser(DOMBasicMovieParser):
|
||||||
|
|
||||||
class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser):
|
class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser):
|
||||||
_BaseParser = DOMBasicCompanyParser
|
_BaseParser = DOMBasicCompanyParser
|
||||||
_notDirectHitTitle = '<title>imdb company'
|
_notDirectHitTitle = '<title>find - imdb'
|
||||||
_titleBuilder = lambda self, x: build_company_name(x)
|
_titleBuilder = lambda self, x: build_company_name(x)
|
||||||
_linkPrefix = '/company/co'
|
_linkPrefix = '/company/co'
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser):
|
||||||
or u''), stripNotes=True)
|
or u''), stripNotes=True)
|
||||||
))]
|
))]
|
||||||
extractors = [Extractor(label='search',
|
extractors = [Extractor(label='search',
|
||||||
path="//td[3]/a[starts-with(@href, " \
|
path="//td[@class='result_text']/a[starts-with(@href, " \
|
||||||
"'/company/co')]/..",
|
"'/company/co')]/..",
|
||||||
attrs=_attrs)]
|
attrs=_attrs)]
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ E.g., for when searching for the title "the passion", the parsed
|
||||||
page would be:
|
page would be:
|
||||||
http://akas.imdb.com/find?q=the+passion&tt=on&mx=20
|
http://akas.imdb.com/find?q=the+passion&tt=on&mx=20
|
||||||
|
|
||||||
Copyright 2004-2010 Davide Alberani <da@erlug.linux.it>
|
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
@ -77,7 +77,7 @@ class DOMBasicMovieParser(DOMParserBase):
|
||||||
def custom_analyze_title(title):
|
def custom_analyze_title(title):
|
||||||
"""Remove garbage notes after the (year), (year/imdbIndex) or (year) (TV)"""
|
"""Remove garbage notes after the (year), (year/imdbIndex) or (year) (TV)"""
|
||||||
# XXX: very crappy. :-(
|
# XXX: very crappy. :-(
|
||||||
nt = title.split(' ')[0]
|
nt = title.split(' aka ')[0]
|
||||||
if nt:
|
if nt:
|
||||||
title = nt
|
title = nt
|
||||||
if not title:
|
if not title:
|
||||||
|
@ -92,7 +92,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||||
"new search system" is used, for movies."""
|
"new search system" is used, for movies."""
|
||||||
|
|
||||||
_BaseParser = DOMBasicMovieParser
|
_BaseParser = DOMBasicMovieParser
|
||||||
_notDirectHitTitle = '<title>imdb title'
|
_notDirectHitTitle = '<title>find - imdb</title>'
|
||||||
_titleBuilder = lambda self, x: build_title(x)
|
_titleBuilder = lambda self, x: build_title(x)
|
||||||
_linkPrefix = '/title/tt'
|
_linkPrefix = '/title/tt'
|
||||||
|
|
||||||
|
@ -101,8 +101,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||||
path={
|
path={
|
||||||
'link': "./a[1]/@href",
|
'link': "./a[1]/@href",
|
||||||
'info': ".//text()",
|
'info': ".//text()",
|
||||||
#'akas': ".//div[@class='_imdbpyAKA']//text()"
|
'akas': "./i//text()"
|
||||||
'akas': ".//p[@class='find-aka']//text()"
|
|
||||||
},
|
},
|
||||||
postprocess=lambda x: (
|
postprocess=lambda x: (
|
||||||
analyze_imdbid(x.get('link') or u''),
|
analyze_imdbid(x.get('link') or u''),
|
||||||
|
@ -110,7 +109,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||||
x.get('akas')
|
x.get('akas')
|
||||||
))]
|
))]
|
||||||
extractors = [Extractor(label='search',
|
extractors = [Extractor(label='search',
|
||||||
path="//td[3]/a[starts-with(@href, '/title/tt')]/..",
|
path="//td[@class='result_text']",
|
||||||
attrs=_attrs)]
|
attrs=_attrs)]
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.url = u''
|
self.url = u''
|
||||||
|
@ -119,14 +118,11 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||||
self.url = u''
|
self.url = u''
|
||||||
|
|
||||||
def preprocess_string(self, html_string):
|
def preprocess_string(self, html_string):
|
||||||
if self._notDirectHitTitle in html_string[:1024].lower():
|
if self._notDirectHitTitle in html_string[:10240].lower():
|
||||||
if self._linkPrefix == '/title/tt':
|
if self._linkPrefix == '/title/tt':
|
||||||
# Only for movies.
|
# Only for movies.
|
||||||
|
# XXX (HTU): does this still apply?
|
||||||
html_string = html_string.replace('(TV mini-series)', '(mini)')
|
html_string = html_string.replace('(TV mini-series)', '(mini)')
|
||||||
html_string = html_string.replace('<p class="find-aka">',
|
|
||||||
'<p class="find-aka">::')
|
|
||||||
#html_string = _reAKAStitles.sub(
|
|
||||||
# r'<div class="_imdbpyAKA">\1::</div>\2', html_string)
|
|
||||||
return html_string
|
return html_string
|
||||||
# Direct hit!
|
# Direct hit!
|
||||||
dbme = self._BaseParser(useModule=self._useModule)
|
dbme = self._BaseParser(useModule=self._useModule)
|
||||||
|
@ -141,7 +137,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||||
title = self._titleBuilder(res[0][1])
|
title = self._titleBuilder(res[0][1])
|
||||||
if not (link and title): return u''
|
if not (link and title): return u''
|
||||||
link = link.replace('http://pro.imdb.com', '')
|
link = link.replace('http://pro.imdb.com', '')
|
||||||
new_html = '<td></td><td></td><td><a href="%s">%s</a></td>' % (link,
|
new_html = '<td class="result_text"><a href="%s">%s</a></td>' % (link,
|
||||||
title)
|
title)
|
||||||
return new_html
|
return new_html
|
||||||
|
|
||||||
|
@ -161,11 +157,14 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||||
if not datum[0] and datum[1]:
|
if not datum[0] and datum[1]:
|
||||||
continue
|
continue
|
||||||
if datum[2] is not None:
|
if datum[2] is not None:
|
||||||
akas = filter(None, datum[2].split('::'))
|
#akas = filter(None, datum[2].split('::'))
|
||||||
if self._linkPrefix == '/title/tt':
|
if self._linkPrefix == '/title/tt':
|
||||||
akas = [a.replace('" - ', '::').rstrip() for a in akas]
|
# XXX (HTU): couldn't find a result with multiple akas
|
||||||
akas = [a.replace('aka "', '', 1).replace('aka "',
|
aka = datum[2]
|
||||||
'', 1).lstrip() for a in akas]
|
akas = [aka[1:-1]] # remove the quotes
|
||||||
|
#akas = [a.replace('" - ', '::').rstrip() for a in akas]
|
||||||
|
#akas = [a.replace('aka "', '', 1).replace('aka "',
|
||||||
|
#'', 1).lstrip() for a in akas]
|
||||||
datum[1]['akas'] = akas
|
datum[1]['akas'] = akas
|
||||||
data['data'][idx] = (datum[0], datum[1])
|
data['data'][idx] = (datum[0], datum[1])
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -7,7 +7,7 @@ for a given person.
|
||||||
E.g., when searching for the name "Mel Gibson", the parsed page would be:
|
E.g., when searching for the name "Mel Gibson", the parsed page would be:
|
||||||
http://akas.imdb.com/find?q=Mel+Gibson&nm=on&mx=20
|
http://akas.imdb.com/find?q=Mel+Gibson&nm=on&mx=20
|
||||||
|
|
||||||
Copyright 2004-2010 Davide Alberani <da@erlug.linux.it>
|
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
@ -55,7 +55,7 @@ class DOMHTMLSearchPersonParser(DOMHTMLSearchMovieParser):
|
||||||
"""Parse the html page that the IMDb web server shows when the
|
"""Parse the html page that the IMDb web server shows when the
|
||||||
"new search system" is used, for persons."""
|
"new search system" is used, for persons."""
|
||||||
_BaseParser = DOMBasicPersonParser
|
_BaseParser = DOMBasicPersonParser
|
||||||
_notDirectHitTitle = '<title>imdb name'
|
_notDirectHitTitle = '<title>find - imdb'
|
||||||
_titleBuilder = lambda self, x: build_name(x, canonical=True)
|
_titleBuilder = lambda self, x: build_name(x, canonical=True)
|
||||||
_linkPrefix = '/name/nm'
|
_linkPrefix = '/name/nm'
|
||||||
|
|
||||||
|
@ -74,11 +74,11 @@ class DOMHTMLSearchPersonParser(DOMHTMLSearchMovieParser):
|
||||||
canonical=1), x.get('akas')
|
canonical=1), x.get('akas')
|
||||||
))]
|
))]
|
||||||
extractors = [Extractor(label='search',
|
extractors = [Extractor(label='search',
|
||||||
path="//td[3]/a[starts-with(@href, '/name/nm')]/..",
|
path="//td[@class='result_text']/a[starts-with(@href, '/name/nm')]/..",
|
||||||
attrs=_attrs)]
|
attrs=_attrs)]
|
||||||
|
|
||||||
def preprocess_string(self, html_string):
|
def preprocess_string(self, html_string):
|
||||||
if self._notDirectHitTitle in html_string[:1024].lower():
|
if self._notDirectHitTitle in html_string[:10240].lower():
|
||||||
html_string = _reAKASp.sub(
|
html_string = _reAKASp.sub(
|
||||||
r'\1<div class="_imdbpyAKA">\2::</div>\3',
|
r'\1<div class="_imdbpyAKA">\2::</div>\3',
|
||||||
html_string)
|
html_string)
|
||||||
|
|
|
@ -340,7 +340,7 @@ def build_movie(txt, movieID=None, roleID=None, status=None,
|
||||||
title = title[:nidx].rstrip()
|
title = title[:nidx].rstrip()
|
||||||
if year:
|
if year:
|
||||||
year = year.strip()
|
year = year.strip()
|
||||||
if title[-1] == ')':
|
if title[-1:] == ')':
|
||||||
fpIdx = title.rfind('(')
|
fpIdx = title.rfind('(')
|
||||||
if fpIdx != -1:
|
if fpIdx != -1:
|
||||||
if notes: notes = '%s %s' % (title[fpIdx:], notes)
|
if notes: notes = '%s %s' % (title[fpIdx:], notes)
|
||||||
|
|
|
@ -6,7 +6,7 @@ IMDb's data for mobile systems.
|
||||||
the imdb.IMDb function will return an instance of this class when
|
the imdb.IMDb function will return an instance of this class when
|
||||||
called with the 'accessSystem' argument set to "mobile".
|
called with the 'accessSystem' argument set to "mobile".
|
||||||
|
|
||||||
Copyright 2005-2011 Davide Alberani <da@erlug.linux.it>
|
Copyright 2005-2012 Davide Alberani <da@erlug.linux.it>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -193,7 +193,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||||
title)
|
title)
|
||||||
return res
|
return res
|
||||||
tl = title[0].lower()
|
tl = title[0].lower()
|
||||||
if not tl.startswith('imdb title'):
|
if not tl.startswith('find - imdb'):
|
||||||
# a direct hit!
|
# a direct hit!
|
||||||
title = _unHtml(title[0])
|
title = _unHtml(title[0])
|
||||||
mid = None
|
mid = None
|
||||||
|
@ -211,7 +211,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||||
# XXX: this results*3 prevents some recursion errors, but...
|
# XXX: this results*3 prevents some recursion errors, but...
|
||||||
# it's not exactly understandable (i.e.: why 'results' is
|
# it's not exactly understandable (i.e.: why 'results' is
|
||||||
# not enough to get all the results?)
|
# not enough to get all the results?)
|
||||||
lis = _findBetween(cont, 'td valign="top">', '</td>',
|
lis = _findBetween(cont, 'td class="result_text">', '</td>',
|
||||||
maxRes=results*3)
|
maxRes=results*3)
|
||||||
for li in lis:
|
for li in lis:
|
||||||
akas = re_makas.findall(li)
|
akas = re_makas.findall(li)
|
||||||
|
@ -492,7 +492,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||||
self._mobile_logger.warn('no title tag searching for name %s', name)
|
self._mobile_logger.warn('no title tag searching for name %s', name)
|
||||||
return res
|
return res
|
||||||
nl = name[0].lower()
|
nl = name[0].lower()
|
||||||
if not nl.startswith('imdb name'):
|
if not nl.startswith('find - imdb'):
|
||||||
# a direct hit!
|
# a direct hit!
|
||||||
name = _unHtml(name[0])
|
name = _unHtml(name[0])
|
||||||
name = name.replace('- Filmography by type' , '').strip()
|
name = name.replace('- Filmography by type' , '').strip()
|
||||||
|
@ -506,7 +506,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||||
return res
|
return res
|
||||||
res[:] = [(str(pid[0]), analyze_name(name, canonical=1))]
|
res[:] = [(str(pid[0]), analyze_name(name, canonical=1))]
|
||||||
else:
|
else:
|
||||||
lis = _findBetween(cont, 'td valign="top">', '</td>',
|
lis = _findBetween(cont, 'td class="result_text">', '</td>',
|
||||||
maxRes=results*3)
|
maxRes=results*3)
|
||||||
for li in lis:
|
for li in lis:
|
||||||
akas = _findBetween(li, '<em>"', '"</em>')
|
akas = _findBetween(li, '<em>"', '"</em>')
|
||||||
|
@ -771,7 +771,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||||
return {'data': d}
|
return {'data': d}
|
||||||
|
|
||||||
def _search_character(self, name, results):
|
def _search_character(self, name, results):
|
||||||
cont = subXMLRefs(self._get_search_content('char', name, results))
|
cont = subXMLRefs(self._get_search_content('ch', name, results))
|
||||||
name = _findBetween(cont, '<title>', '</title>', maxRes=1)
|
name = _findBetween(cont, '<title>', '</title>', maxRes=1)
|
||||||
res = []
|
res = []
|
||||||
if not name:
|
if not name:
|
||||||
|
@ -779,8 +779,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||||
name)
|
name)
|
||||||
return res
|
return res
|
||||||
nl = name[0].lower()
|
nl = name[0].lower()
|
||||||
if not (nl.startswith('imdb search') or nl.startswith('imdb search') \
|
if not nl.startswith('find - imdb'):
|
||||||
or nl.startswith('imdb character')):
|
|
||||||
# a direct hit!
|
# a direct hit!
|
||||||
name = _unHtml(name[0]).replace('(Character)', '').strip()
|
name = _unHtml(name[0]).replace('(Character)', '').strip()
|
||||||
pid = None
|
pid = None
|
||||||
|
@ -793,23 +792,18 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||||
return res
|
return res
|
||||||
res[:] = [(str(pid[0]), analyze_name(name))]
|
res[:] = [(str(pid[0]), analyze_name(name))]
|
||||||
else:
|
else:
|
||||||
sects = _findBetween(cont, '<b>Popular Characters</b>', '</table>',
|
lis = _findBetween(cont, '<td class="result_text"',
|
||||||
maxRes=results*3)
|
['<small', '</td>', '<br'])
|
||||||
sects += _findBetween(cont, '<b>Characters', '</table>',
|
for li in lis:
|
||||||
maxRes=results*3)
|
li = '<%s' % li
|
||||||
for sect in sects:
|
pid = re_imdbID.findall(li)
|
||||||
lis = _findBetween(sect, '<a href="/character/',
|
pname = _unHtml(li)
|
||||||
['<small', '</td>', '<br'])
|
if not (pid and pname):
|
||||||
for li in lis:
|
self._mobile_logger.debug('no name/characterID' \
|
||||||
li = '<%s' % li
|
' parsing %s searching for' \
|
||||||
pid = re_imdbID.findall(li)
|
' character %s', li, name)
|
||||||
pname = _unHtml(li)
|
continue
|
||||||
if not (pid and pname):
|
res.append((str(pid[0]), analyze_name(pname)))
|
||||||
self._mobile_logger.debug('no name/characterID' \
|
|
||||||
' parsing %s searching for' \
|
|
||||||
' character %s', li, name)
|
|
||||||
continue
|
|
||||||
res.append((str(pid[0]), analyze_name(pname)))
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def get_character_main(self, characterID):
|
def get_character_main(self, characterID):
|
||||||
|
|
|
@ -7,7 +7,7 @@ the SQLObject _AND_ SQLAlchemy Object Relational Managers is available.
|
||||||
the imdb.IMDb function will return an instance of this class when
|
the imdb.IMDb function will return an instance of this class when
|
||||||
called with the 'accessSystem' argument set to "sql", "database" or "db".
|
called with the 'accessSystem' argument set to "sql", "database" or "db".
|
||||||
|
|
||||||
Copyright 2005-2010 Davide Alberani <da@erlug.linux.it>
|
Copyright 2005-2012 Davide Alberani <da@erlug.linux.it>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -452,7 +452,12 @@ def get_movie_data(movieID, kindDict, fromAka=0, _table=None):
|
||||||
else:
|
else:
|
||||||
if not fromAka: Table = Title
|
if not fromAka: Table = Title
|
||||||
else: Table = AkaTitle
|
else: Table = AkaTitle
|
||||||
m = Table.get(movieID)
|
try:
|
||||||
|
m = Table.get(movieID)
|
||||||
|
except Exception, e:
|
||||||
|
_aux_logger.warn('Unable to fetch information for movieID %s: %s', movieID, e)
|
||||||
|
mdict = {}
|
||||||
|
return mdict
|
||||||
mdict = {'title': m.title, 'kind': kindDict[m.kindID],
|
mdict = {'title': m.title, 'kind': kindDict[m.kindID],
|
||||||
'year': m.productionYear, 'imdbIndex': m.imdbIndex,
|
'year': m.productionYear, 'imdbIndex': m.imdbIndex,
|
||||||
'season': m.seasonNr, 'episode': m.episodeNr}
|
'season': m.seasonNr, 'episode': m.episodeNr}
|
||||||
|
@ -825,14 +830,14 @@ class IMDbSqlAccessSystem(IMDbBase):
|
||||||
imdbID = movie.imdbID
|
imdbID = movie.imdbID
|
||||||
if imdbID is not None: return '%07d' % imdbID
|
if imdbID is not None: return '%07d' % imdbID
|
||||||
m_dict = get_movie_data(movie.id, self._kind)
|
m_dict = get_movie_data(movie.id, self._kind)
|
||||||
titline = build_title(m_dict, ptdf=1)
|
titline = build_title(m_dict, ptdf=0)
|
||||||
imdbID = self.title2imdbID(titline)
|
imdbID = self.title2imdbID(titline, m_dict['kind'])
|
||||||
# If the imdbID was retrieved from the web and was not in the
|
# If the imdbID was retrieved from the web and was not in the
|
||||||
# database, update the database (ignoring errors, because it's
|
# database, update the database (ignoring errors, because it's
|
||||||
# possibile that the current user has not update privileges).
|
# possibile that the current user has not update privileges).
|
||||||
# There're times when I think I'm a genius; this one of
|
# There're times when I think I'm a genius; this one of
|
||||||
# those times... <g>
|
# those times... <g>
|
||||||
if imdbID is not None:
|
if imdbID is not None and not isinstance(imdbID, list):
|
||||||
try: movie.imdbID = int(imdbID)
|
try: movie.imdbID = int(imdbID)
|
||||||
except: pass
|
except: pass
|
||||||
return imdbID
|
return imdbID
|
||||||
|
@ -847,9 +852,9 @@ class IMDbSqlAccessSystem(IMDbBase):
|
||||||
imdbID = person.imdbID
|
imdbID = person.imdbID
|
||||||
if imdbID is not None: return '%07d' % imdbID
|
if imdbID is not None: return '%07d' % imdbID
|
||||||
n_dict = {'name': person.name, 'imdbIndex': person.imdbIndex}
|
n_dict = {'name': person.name, 'imdbIndex': person.imdbIndex}
|
||||||
namline = build_name(n_dict, canonical=1)
|
namline = build_name(n_dict, canonical=False)
|
||||||
imdbID = self.name2imdbID(namline)
|
imdbID = self.name2imdbID(namline)
|
||||||
if imdbID is not None:
|
if imdbID is not None and not isinstance(imdbID, list):
|
||||||
try: person.imdbID = int(imdbID)
|
try: person.imdbID = int(imdbID)
|
||||||
except: pass
|
except: pass
|
||||||
return imdbID
|
return imdbID
|
||||||
|
@ -864,9 +869,9 @@ class IMDbSqlAccessSystem(IMDbBase):
|
||||||
imdbID = character.imdbID
|
imdbID = character.imdbID
|
||||||
if imdbID is not None: return '%07d' % imdbID
|
if imdbID is not None: return '%07d' % imdbID
|
||||||
n_dict = {'name': character.name, 'imdbIndex': character.imdbIndex}
|
n_dict = {'name': character.name, 'imdbIndex': character.imdbIndex}
|
||||||
namline = build_name(n_dict, canonical=1)
|
namline = build_name(n_dict, canonical=False)
|
||||||
imdbID = self.character2imdbID(namline)
|
imdbID = self.character2imdbID(namline)
|
||||||
if imdbID is not None:
|
if imdbID is not None and not isinstance(imdbID, list):
|
||||||
try: character.imdbID = int(imdbID)
|
try: character.imdbID = int(imdbID)
|
||||||
except: pass
|
except: pass
|
||||||
return imdbID
|
return imdbID
|
||||||
|
@ -883,7 +888,7 @@ class IMDbSqlAccessSystem(IMDbBase):
|
||||||
n_dict = {'name': company.name, 'country': company.countryCode}
|
n_dict = {'name': company.name, 'country': company.countryCode}
|
||||||
namline = build_company_name(n_dict)
|
namline = build_company_name(n_dict)
|
||||||
imdbID = self.company2imdbID(namline)
|
imdbID = self.company2imdbID(namline)
|
||||||
if imdbID is not None:
|
if imdbID is not None and not isinstance(imdbID, list):
|
||||||
try: company.imdbID = int(imdbID)
|
try: company.imdbID = int(imdbID)
|
||||||
except: pass
|
except: pass
|
||||||
return imdbID
|
return imdbID
|
||||||
|
@ -1116,8 +1121,9 @@ class IMDbSqlAccessSystem(IMDbBase):
|
||||||
if mlinks:
|
if mlinks:
|
||||||
for ml in mlinks:
|
for ml in mlinks:
|
||||||
lmovieData = get_movie_data(ml[0], self._kind)
|
lmovieData = get_movie_data(ml[0], self._kind)
|
||||||
m = Movie(movieID=ml[0], data=lmovieData, accessSystem='sql')
|
if lmovieData:
|
||||||
ml[0] = m
|
m = Movie(movieID=ml[0], data=lmovieData, accessSystem='sql')
|
||||||
|
ml[0] = m
|
||||||
res['connections'] = {}
|
res['connections'] = {}
|
||||||
mlinks[:] = _groupListBy(mlinks, 1)
|
mlinks[:] = _groupListBy(mlinks, 1)
|
||||||
for group in mlinks:
|
for group in mlinks:
|
||||||
|
|
|
@ -466,6 +466,7 @@ class _AlchemyConnection(object):
|
||||||
|
|
||||||
def setConnection(uri, tables, encoding='utf8', debug=False):
|
def setConnection(uri, tables, encoding='utf8', debug=False):
|
||||||
"""Set connection for every table."""
|
"""Set connection for every table."""
|
||||||
|
params = {'encoding': encoding}
|
||||||
# FIXME: why on earth MySQL requires an additional parameter,
|
# FIXME: why on earth MySQL requires an additional parameter,
|
||||||
# is well beyond my understanding...
|
# is well beyond my understanding...
|
||||||
if uri.startswith('mysql'):
|
if uri.startswith('mysql'):
|
||||||
|
@ -474,7 +475,11 @@ def setConnection(uri, tables, encoding='utf8', debug=False):
|
||||||
else:
|
else:
|
||||||
uri += '?'
|
uri += '?'
|
||||||
uri += 'charset=%s' % encoding
|
uri += 'charset=%s' % encoding
|
||||||
params = {'encoding': encoding}
|
|
||||||
|
# On some server configurations, we will need to explictly enable
|
||||||
|
# loading data from local files
|
||||||
|
params['local_infile'] = 1
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
params['echo'] = True
|
params['echo'] = True
|
||||||
if uri.startswith('ibm_db'):
|
if uri.startswith('ibm_db'):
|
||||||
|
|
Binary file not shown.
|
@ -182,6 +182,10 @@ def setConnection(uri, tables, encoding='utf8', debug=False):
|
||||||
kw['use_unicode'] = 1
|
kw['use_unicode'] = 1
|
||||||
#kw['sqlobject_encoding'] = encoding
|
#kw['sqlobject_encoding'] = encoding
|
||||||
kw['charset'] = encoding
|
kw['charset'] = encoding
|
||||||
|
|
||||||
|
# On some server configurations, we will need to explictly enable
|
||||||
|
# loading data from local files
|
||||||
|
kw['local_infile'] = 1
|
||||||
conn = connectionForURI(uri, **kw)
|
conn = connectionForURI(uri, **kw)
|
||||||
conn.debug = debug
|
conn.debug = debug
|
||||||
# XXX: doesn't work and a work-around was put in imdbpy2sql.py;
|
# XXX: doesn't work and a work-around was put in imdbpy2sql.py;
|
||||||
|
|
|
@ -3,7 +3,7 @@ utils module (imdb package).
|
||||||
|
|
||||||
This module provides basic utilities for the imdb package.
|
This module provides basic utilities for the imdb package.
|
||||||
|
|
||||||
Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
|
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||||
2009 H. Turgut Uyar <uyar@tekir.org>
|
2009 H. Turgut Uyar <uyar@tekir.org>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
@ -189,10 +189,9 @@ _unicodeArticles = linguistics.toUnicode(_articles)
|
||||||
articlesDicts = linguistics.articlesDictsForLang(None)
|
articlesDicts = linguistics.articlesDictsForLang(None)
|
||||||
spArticles = linguistics.spArticlesForLang(None)
|
spArticles = linguistics.spArticlesForLang(None)
|
||||||
|
|
||||||
def canonicalTitle(title, lang=None):
|
def canonicalTitle(title, lang=None, imdbIndex=None):
|
||||||
"""Return the title in the canonic format 'Movie Title, The';
|
"""Return the title in the canonic format 'Movie Title, The';
|
||||||
beware that it doesn't handle long imdb titles, but only the
|
beware that it doesn't handle long imdb titles.
|
||||||
title portion, without year[/imdbIndex] or special markup.
|
|
||||||
The 'lang' argument can be used to specify the language of the title.
|
The 'lang' argument can be used to specify the language of the title.
|
||||||
"""
|
"""
|
||||||
isUnicode = isinstance(title, unicode)
|
isUnicode = isinstance(title, unicode)
|
||||||
|
@ -203,15 +202,19 @@ def canonicalTitle(title, lang=None):
|
||||||
except IndexError:
|
except IndexError:
|
||||||
pass
|
pass
|
||||||
if isUnicode:
|
if isUnicode:
|
||||||
_format = u'%s, %s'
|
_format = u'%s%s, %s'
|
||||||
else:
|
else:
|
||||||
_format = '%s, %s'
|
_format = '%s%s, %s'
|
||||||
ltitle = title.lower()
|
ltitle = title.lower()
|
||||||
|
if imdbIndex:
|
||||||
|
imdbIndex = ' (%s)' % imdbIndex
|
||||||
|
else:
|
||||||
|
imdbIndex = ''
|
||||||
spArticles = linguistics.spArticlesForLang(lang)
|
spArticles = linguistics.spArticlesForLang(lang)
|
||||||
for article in spArticles[isUnicode]:
|
for article in spArticles[isUnicode]:
|
||||||
if ltitle.startswith(article):
|
if ltitle.startswith(article):
|
||||||
lart = len(article)
|
lart = len(article)
|
||||||
title = _format % (title[lart:], title[:lart])
|
title = _format % (title[lart:], imdbIndex, title[:lart])
|
||||||
if article[-1] == ' ':
|
if article[-1] == ' ':
|
||||||
title = title[:-1]
|
title = title[:-1]
|
||||||
break
|
break
|
||||||
|
@ -383,18 +386,42 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
|
||||||
if title.endswith('(TV)'):
|
if title.endswith('(TV)'):
|
||||||
kind = u'tv movie'
|
kind = u'tv movie'
|
||||||
title = title[:-4].rstrip()
|
title = title[:-4].rstrip()
|
||||||
|
elif title.endswith('(TV Movie)'):
|
||||||
|
kind = u'tv movie'
|
||||||
|
title = title[:-10].rstrip()
|
||||||
elif title.endswith('(V)'):
|
elif title.endswith('(V)'):
|
||||||
kind = u'video movie'
|
kind = u'video movie'
|
||||||
title = title[:-3].rstrip()
|
title = title[:-3].rstrip()
|
||||||
elif title.endswith('(video)'):
|
elif title.lower().endswith('(video)'):
|
||||||
kind = u'video movie'
|
kind = u'video movie'
|
||||||
title = title[:-7].rstrip()
|
title = title[:-7].rstrip()
|
||||||
|
elif title.endswith('(TV Short)'):
|
||||||
|
kind = u'tv short'
|
||||||
|
title = title[:-10].rstrip()
|
||||||
|
elif title.endswith('(TV Mini-Series)'):
|
||||||
|
kind = u'tv mini series'
|
||||||
|
title = title[:-16].rstrip()
|
||||||
elif title.endswith('(mini)'):
|
elif title.endswith('(mini)'):
|
||||||
kind = u'tv mini series'
|
kind = u'tv mini series'
|
||||||
title = title[:-6].rstrip()
|
title = title[:-6].rstrip()
|
||||||
elif title.endswith('(VG)'):
|
elif title.endswith('(VG)'):
|
||||||
kind = u'video game'
|
kind = u'video game'
|
||||||
title = title[:-4].rstrip()
|
title = title[:-4].rstrip()
|
||||||
|
elif title.endswith('(Video Game)'):
|
||||||
|
kind = u'video game'
|
||||||
|
title = title[:-12].rstrip()
|
||||||
|
elif title.endswith('(TV Series)'):
|
||||||
|
epindex = title.find('(TV Episode) - ')
|
||||||
|
if epindex >= 0:
|
||||||
|
# It's an episode of a series.
|
||||||
|
kind = u'episode'
|
||||||
|
series_info = analyze_title(title[epindex + 15:])
|
||||||
|
result['episode of'] = series_info.get('title')
|
||||||
|
result['series year'] = series_info.get('year')
|
||||||
|
title = title[:epindex]
|
||||||
|
else:
|
||||||
|
kind = u'tv series'
|
||||||
|
title = title[:-11].rstrip()
|
||||||
# Search for the year and the optional imdbIndex (a roman number).
|
# Search for the year and the optional imdbIndex (a roman number).
|
||||||
yi = re_year_index.findall(title)
|
yi = re_year_index.findall(title)
|
||||||
if not yi:
|
if not yi:
|
||||||
|
@ -430,9 +457,6 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
|
||||||
if not kind:
|
if not kind:
|
||||||
kind = u'tv series'
|
kind = u'tv series'
|
||||||
title = title[1:-1].strip()
|
title = title[1:-1].strip()
|
||||||
elif title.endswith('(TV series)'):
|
|
||||||
kind = u'tv series'
|
|
||||||
title = title[:-11].rstrip()
|
|
||||||
if not title:
|
if not title:
|
||||||
raise IMDbParserError('invalid title: "%s"' % original_t)
|
raise IMDbParserError('invalid title: "%s"' % original_t)
|
||||||
if canonical is not None:
|
if canonical is not None:
|
||||||
|
@ -489,7 +513,7 @@ def _convertTime(title, fromPTDFtoWEB=1, _emptyString=u''):
|
||||||
|
|
||||||
def build_title(title_dict, canonical=None, canonicalSeries=None,
|
def build_title(title_dict, canonical=None, canonicalSeries=None,
|
||||||
canonicalEpisode=None, ptdf=0, lang=None, _doYear=1,
|
canonicalEpisode=None, ptdf=0, lang=None, _doYear=1,
|
||||||
_emptyString=u''):
|
_emptyString=u'', appendKind=True):
|
||||||
"""Given a dictionary that represents a "long" IMDb title,
|
"""Given a dictionary that represents a "long" IMDb title,
|
||||||
return a string.
|
return a string.
|
||||||
|
|
||||||
|
@ -511,6 +535,11 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
|
||||||
doYear = 0
|
doYear = 0
|
||||||
if ptdf:
|
if ptdf:
|
||||||
doYear = 1
|
doYear = 1
|
||||||
|
# XXX: for results coming from the new search page.
|
||||||
|
if not isinstance(episode_of, (dict, _Container)):
|
||||||
|
episode_of = {'title': episode_of, 'kind': 'tv series'}
|
||||||
|
if 'series year' in title_dict:
|
||||||
|
episode_of['year'] = title_dict['series year']
|
||||||
pre_title = build_title(episode_of, canonical=canonicalSeries,
|
pre_title = build_title(episode_of, canonical=canonicalSeries,
|
||||||
ptdf=0, _doYear=doYear,
|
ptdf=0, _doYear=doYear,
|
||||||
_emptyString=_emptyString)
|
_emptyString=_emptyString)
|
||||||
|
@ -545,12 +574,14 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
|
||||||
episode_title += '.%s' % episode
|
episode_title += '.%s' % episode
|
||||||
episode_title += ')'
|
episode_title += ')'
|
||||||
episode_title = '{%s}' % episode_title
|
episode_title = '{%s}' % episode_title
|
||||||
return '%s %s' % (pre_title, episode_title)
|
return _emptyString + '%s %s' % (_emptyString + pre_title,
|
||||||
|
_emptyString + episode_title)
|
||||||
title = title_dict.get('title', '')
|
title = title_dict.get('title', '')
|
||||||
|
imdbIndex = title_dict.get('imdbIndex', '')
|
||||||
if not title: return _emptyString
|
if not title: return _emptyString
|
||||||
if canonical is not None:
|
if canonical is not None:
|
||||||
if canonical:
|
if canonical:
|
||||||
title = canonicalTitle(title, lang=lang)
|
title = canonicalTitle(title, lang=lang, imdbIndex=imdbIndex)
|
||||||
else:
|
else:
|
||||||
title = normalizeTitle(title, lang=lang)
|
title = normalizeTitle(title, lang=lang)
|
||||||
if pre_title:
|
if pre_title:
|
||||||
|
@ -558,15 +589,20 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
|
||||||
if kind in (u'tv series', u'tv mini series'):
|
if kind in (u'tv series', u'tv mini series'):
|
||||||
title = '"%s"' % title
|
title = '"%s"' % title
|
||||||
if _doYear:
|
if _doYear:
|
||||||
imdbIndex = title_dict.get('imdbIndex')
|
year = title_dict.get('year') or '????'
|
||||||
year = title_dict.get('year') or u'????'
|
|
||||||
if isinstance(_emptyString, str):
|
if isinstance(_emptyString, str):
|
||||||
year = str(year)
|
year = str(year)
|
||||||
title += ' (%s' % year
|
imdbIndex = title_dict.get('imdbIndex')
|
||||||
if imdbIndex:
|
if not ptdf:
|
||||||
title += '/%s' % imdbIndex
|
if imdbIndex and (canonical is None or canonical):
|
||||||
title += ')'
|
title += ' (%s)' % imdbIndex
|
||||||
if kind:
|
title += ' (%s)' % year
|
||||||
|
else:
|
||||||
|
title += ' (%s' % year
|
||||||
|
if imdbIndex and (canonical is None or canonical):
|
||||||
|
title += '/%s' % imdbIndex
|
||||||
|
title += ')'
|
||||||
|
if appendKind and kind:
|
||||||
if kind == 'tv movie':
|
if kind == 'tv movie':
|
||||||
title += ' (TV)'
|
title += ' (TV)'
|
||||||
elif kind == 'video movie':
|
elif kind == 'video movie':
|
||||||
|
|
|
@ -11,6 +11,7 @@ __author__ = "dbr/Ben"
|
||||||
__version__ = "1.9"
|
__version__ = "1.9"
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
import getpass
|
import getpass
|
||||||
import StringIO
|
import StringIO
|
||||||
|
@ -18,8 +19,10 @@ import tempfile
|
||||||
import warnings
|
import warnings
|
||||||
import logging
|
import logging
|
||||||
import zipfile
|
import zipfile
|
||||||
|
import datetime as dt
|
||||||
import requests
|
import requests
|
||||||
import cachecontrol
|
import cachecontrol
|
||||||
|
import xmltodict
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import xml.etree.cElementTree as ElementTree
|
import xml.etree.cElementTree as ElementTree
|
||||||
|
@ -31,6 +34,7 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
gzip = None
|
gzip = None
|
||||||
|
|
||||||
|
from lib.dateutil.parser import parse
|
||||||
from cachecontrol import caches
|
from cachecontrol import caches
|
||||||
|
|
||||||
from tvdb_ui import BaseUI, ConsoleUI
|
from tvdb_ui import BaseUI, ConsoleUI
|
||||||
|
@ -560,44 +564,71 @@ class Tvdb:
|
||||||
except requests.Timeout, e:
|
except requests.Timeout, e:
|
||||||
raise tvdb_error("Connection timed out " + str(e.message) + " while loading URL " + str(url))
|
raise tvdb_error("Connection timed out " + str(e.message) + " while loading URL " + str(url))
|
||||||
|
|
||||||
if 'application/zip' in resp.headers.get("Content-Type", '') and resp.ok:
|
def process(path, key, value):
|
||||||
try:
|
key = key.lower()
|
||||||
# TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20]
|
|
||||||
log().debug("We recived a zip file unpacking now ...")
|
|
||||||
zipdata = StringIO.StringIO()
|
|
||||||
zipdata.write(resp.content)
|
|
||||||
myzipfile = zipfile.ZipFile(zipdata)
|
|
||||||
return myzipfile.read('%s.xml' % language)
|
|
||||||
except zipfile.BadZipfile:
|
|
||||||
raise tvdb_error("Bad zip file received from thetvdb.com, could not read it")
|
|
||||||
|
|
||||||
return resp.content if resp.ok else None
|
# clean up value and do type changes
|
||||||
|
if value:
|
||||||
|
try:
|
||||||
|
# convert to integer if needed
|
||||||
|
if value.isdigit():
|
||||||
|
value = int(value)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if key in ['banner', 'fanart', 'poster']:
|
||||||
|
value = self.config['url_artworkPrefix'] % (value)
|
||||||
|
else:
|
||||||
|
value = self._cleanData(value)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if key == 'firstaired' and value in "0000-00-00":
|
||||||
|
new_value = str(dt.date.fromordinal(1))
|
||||||
|
new_value = re.sub("([-]0{2}){1,}", "", new_value)
|
||||||
|
fixDate = parse(new_value, fuzzy=True).date()
|
||||||
|
value = fixDate.strftime("%Y-%m-%d")
|
||||||
|
elif key == 'firstaired':
|
||||||
|
value = parse(value, fuzzy=True).date()
|
||||||
|
value = value.strftime("%Y-%m-%d")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
value = self._cleanData(value)
|
||||||
|
return (key, value)
|
||||||
|
|
||||||
|
if resp.ok:
|
||||||
|
if 'application/zip' in resp.headers.get("Content-Type", ''):
|
||||||
|
try:
|
||||||
|
# TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20]
|
||||||
|
log().debug("We recived a zip file unpacking now ...")
|
||||||
|
zipdata = StringIO.StringIO()
|
||||||
|
zipdata.write(resp.content)
|
||||||
|
myzipfile = zipfile.ZipFile(zipdata)
|
||||||
|
return xmltodict.parse(myzipfile.read('%s.xml' % language), postprocessor=process)
|
||||||
|
except zipfile.BadZipfile:
|
||||||
|
raise tvdb_error("Bad zip file received from thetvdb.com, could not read it")
|
||||||
|
else:
|
||||||
|
return xmltodict.parse(resp.text.strip(), postprocessor=process)
|
||||||
|
|
||||||
def _getetsrc(self, url, params=None, language=None):
|
def _getetsrc(self, url, params=None, language=None):
|
||||||
"""Loads a URL using caching, returns an ElementTree of the source
|
"""Loads a URL using caching, returns an ElementTree of the source
|
||||||
"""
|
"""
|
||||||
src = self._loadUrl(url, params=params, language=language)
|
|
||||||
try:
|
try:
|
||||||
# TVDB doesn't sanitize \r (CR) from user input in some fields,
|
|
||||||
# remove it to avoid errors. Change from SickBeard, from will14m
|
|
||||||
return ElementTree.fromstring(src.rstrip("\r")) if src else None
|
|
||||||
except SyntaxError:
|
|
||||||
src = self._loadUrl(url, params=params, language=language)
|
src = self._loadUrl(url, params=params, language=language)
|
||||||
try:
|
src = [src[item] for item in src][0]
|
||||||
return ElementTree.fromstring(src.rstrip("\r")) if src else None
|
except:
|
||||||
except SyntaxError, exceptionmsg:
|
errormsg = "There was an error with the XML retrieved from thetvdb.com:"
|
||||||
errormsg = "There was an error with the XML retrieved from thetvdb.com:\n%s" % (
|
|
||||||
exceptionmsg
|
if self.config['cache_enabled']:
|
||||||
|
errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
|
||||||
|
self.config['cache_location']
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.config['cache_enabled']:
|
errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on"
|
||||||
errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
|
errormsg += "\nhttp://dbr.lighthouseapp.com/projects/13342-tvdb_api/overview\n"
|
||||||
self.config['cache_location']
|
raise tvdb_error(errormsg)
|
||||||
)
|
|
||||||
|
|
||||||
errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on"
|
return src
|
||||||
errormsg += "\nhttp://dbr.lighthouseapp.com/projects/13342-tvdb_api/overview\n"
|
|
||||||
raise tvdb_error(errormsg)
|
|
||||||
|
|
||||||
def _setItem(self, sid, seas, ep, attrib, value):
|
def _setItem(self, sid, seas, ep, attrib, value):
|
||||||
"""Creates a new episode, creating Show(), Season() and
|
"""Creates a new episode, creating Show(), Season() and
|
||||||
|
@ -649,9 +680,8 @@ class Tvdb:
|
||||||
log().debug("Searching for show %s" % series)
|
log().debug("Searching for show %s" % series)
|
||||||
self.config['params_getSeries']['seriesname'] = series
|
self.config['params_getSeries']['seriesname'] = series
|
||||||
seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
|
seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
|
||||||
allSeries = list(dict((s.tag.lower(), s.text) for s in x.getchildren()) for x in seriesEt)
|
|
||||||
|
|
||||||
return allSeries
|
return [seriesEt[item] for item in seriesEt][0]
|
||||||
|
|
||||||
def _getSeries(self, series):
|
def _getSeries(self, series):
|
||||||
"""This searches TheTVDB.com for the series name,
|
"""This searches TheTVDB.com for the series name,
|
||||||
|
@ -798,24 +828,13 @@ class Tvdb:
|
||||||
self.config['url_seriesInfo'] % (sid, getShowInLanguage)
|
self.config['url_seriesInfo'] % (sid, getShowInLanguage)
|
||||||
)
|
)
|
||||||
|
|
||||||
if seriesInfoEt is None: return False
|
# check and make sure we have data to process and that it contains a series name
|
||||||
for curInfo in seriesInfoEt.findall("Series")[0]:
|
if seriesInfoEt is None or 'seriesname' not in seriesInfoEt['series']:
|
||||||
tag = curInfo.tag.lower()
|
return False
|
||||||
value = curInfo.text
|
|
||||||
|
|
||||||
if tag == 'seriesname' and value is None:
|
for k, v in seriesInfoEt['series'].items():
|
||||||
return False
|
self._setShowData(sid, k, v)
|
||||||
|
|
||||||
if value is not None:
|
|
||||||
if tag == 'id':
|
|
||||||
value = int(value)
|
|
||||||
|
|
||||||
if tag in ['banner', 'fanart', 'poster']:
|
|
||||||
value = self.config['url_artworkPrefix'] % (value)
|
|
||||||
else:
|
|
||||||
value = self._cleanData(value)
|
|
||||||
|
|
||||||
self._setShowData(sid, tag, value)
|
|
||||||
if seriesSearch:
|
if seriesSearch:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -837,63 +856,40 @@ class Tvdb:
|
||||||
|
|
||||||
epsEt = self._getetsrc(url, language=language)
|
epsEt = self._getetsrc(url, language=language)
|
||||||
|
|
||||||
for cur_ep in epsEt.findall("Episode"):
|
for cur_ep in epsEt["episode"]:
|
||||||
|
|
||||||
if self.config['dvdorder']:
|
if self.config['dvdorder']:
|
||||||
log().debug('Using DVD ordering.')
|
log().debug('Using DVD ordering.')
|
||||||
use_dvd = cur_ep.find('DVD_season').text != None and cur_ep.find('DVD_episodenumber').text != None
|
use_dvd = cur_ep['dvd_season'] != None and cur_ep['dvd_episodenumber'] != None
|
||||||
else:
|
else:
|
||||||
use_dvd = False
|
use_dvd = False
|
||||||
|
|
||||||
if use_dvd:
|
if use_dvd:
|
||||||
elem_seasnum, elem_epno = cur_ep.find('DVD_season'), cur_ep.find('DVD_episodenumber')
|
seasnum, epno = cur_ep['dvd_season'], cur_ep['dvd_episodenumber']
|
||||||
else:
|
else:
|
||||||
elem_seasnum, elem_epno = cur_ep.find('SeasonNumber'), cur_ep.find('EpisodeNumber')
|
seasnum, epno = cur_ep['seasonnumber'], cur_ep['episodenumber']
|
||||||
|
|
||||||
if elem_seasnum is None or elem_epno is None:
|
|
||||||
|
|
||||||
|
if seasnum is None or epno is None:
|
||||||
log().warning("An episode has incomplete season/episode number (season: %r, episode: %r)" % (
|
log().warning("An episode has incomplete season/episode number (season: %r, episode: %r)" % (
|
||||||
elem_seasnum, elem_epno))
|
seasnum, epno))
|
||||||
log().debug(
|
|
||||||
" ".join(
|
|
||||||
"%r is %r" % (child.tag, child.text) for child in cur_ep.getchildren()))
|
|
||||||
# TODO: Should this happen?
|
|
||||||
continue # Skip to next episode
|
continue # Skip to next episode
|
||||||
|
|
||||||
|
|
||||||
# float() is because https://github.com/dbr/tvnamer/issues/95 - should probably be fixed in TVDB data
|
# float() is because https://github.com/dbr/tvnamer/issues/95 - should probably be fixed in TVDB data
|
||||||
seas_no = int(float(elem_seasnum.text))
|
seas_no = int(float(seasnum))
|
||||||
ep_no = int(float(elem_epno.text))
|
ep_no = int(float(epno))
|
||||||
|
|
||||||
useDVD = False
|
for k,v in cur_ep.items():
|
||||||
|
k = k.lower()
|
||||||
|
|
||||||
if (self.config['dvdorder']):
|
if v is not None:
|
||||||
log().debug('DVD Order? Yes')
|
if k == 'id':
|
||||||
useDVD = (cur_ep.find('DVD_season').text != None and cur_ep.find('DVD_episodenumber').text != None)
|
v = int(v)
|
||||||
else:
|
|
||||||
log().debug('DVD Order? No')
|
|
||||||
|
|
||||||
if (useDVD):
|
if k == 'filename':
|
||||||
log().debug('Use DVD Order? Yes')
|
v = self.config['url_artworkPrefix'] % (v)
|
||||||
seas_no = int(cur_ep.find('DVD_season').text)
|
|
||||||
ep_no = int(float(cur_ep.find('DVD_episodenumber').text))
|
|
||||||
else:
|
|
||||||
log().debug('Use DVD Order? No')
|
|
||||||
seas_no = int(cur_ep.find('SeasonNumber').text)
|
|
||||||
ep_no = int(cur_ep.find('EpisodeNumber').text)
|
|
||||||
|
|
||||||
for cur_item in cur_ep.getchildren():
|
|
||||||
tag = cur_item.tag.lower()
|
|
||||||
value = cur_item.text
|
|
||||||
if value is not None:
|
|
||||||
if tag == 'id':
|
|
||||||
value = int(value)
|
|
||||||
|
|
||||||
if tag == 'filename':
|
|
||||||
value = self.config['url_artworkPrefix'] % (value)
|
|
||||||
else:
|
else:
|
||||||
value = self._cleanData(value)
|
v = self._cleanData(v)
|
||||||
self._setItem(sid, seas_no, ep_no, tag, value)
|
|
||||||
|
self._setItem(sid, seas_no, ep_no, k, v)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python2
|
# !/usr/bin/env python2
|
||||||
#encoding:utf-8
|
#encoding:utf-8
|
||||||
#author:echel0n
|
#author:echel0n
|
||||||
#project:tvrage_api
|
#project:tvrage_api
|
||||||
|
@ -24,6 +24,7 @@ import logging
|
||||||
import datetime as dt
|
import datetime as dt
|
||||||
import requests
|
import requests
|
||||||
import cachecontrol
|
import cachecontrol
|
||||||
|
import xmltodict
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import xml.etree.cElementTree as ElementTree
|
import xml.etree.cElementTree as ElementTree
|
||||||
|
@ -35,11 +36,13 @@ from cachecontrol import caches
|
||||||
|
|
||||||
from tvrage_ui import BaseUI
|
from tvrage_ui import BaseUI
|
||||||
from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound,
|
from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound,
|
||||||
tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound)
|
tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound)
|
||||||
|
|
||||||
|
|
||||||
def log():
|
def log():
|
||||||
return logging.getLogger("tvrage_api")
|
return logging.getLogger("tvrage_api")
|
||||||
|
|
||||||
|
|
||||||
def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
|
def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
|
||||||
"""Retry calling the decorated function using an exponential backoff.
|
"""Retry calling the decorated function using an exponential backoff.
|
||||||
|
|
||||||
|
@ -83,6 +86,7 @@ def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
|
||||||
|
|
||||||
return deco_retry
|
return deco_retry
|
||||||
|
|
||||||
|
|
||||||
class ShowContainer(dict):
|
class ShowContainer(dict):
|
||||||
"""Simple dict that holds a series of Show instances
|
"""Simple dict that holds a series of Show instances
|
||||||
"""
|
"""
|
||||||
|
@ -105,13 +109,14 @@ class ShowContainer(dict):
|
||||||
|
|
||||||
_lastgc = time.time()
|
_lastgc = time.time()
|
||||||
del tbd
|
del tbd
|
||||||
|
|
||||||
super(ShowContainer, self).__setitem__(key, value)
|
super(ShowContainer, self).__setitem__(key, value)
|
||||||
|
|
||||||
|
|
||||||
class Show(dict):
|
class Show(dict):
|
||||||
"""Holds a dict of seasons, and show data.
|
"""Holds a dict of seasons, and show data.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
dict.__init__(self)
|
dict.__init__(self)
|
||||||
self.data = {}
|
self.data = {}
|
||||||
|
@ -157,7 +162,7 @@ class Show(dict):
|
||||||
raise tvrage_episodenotfound("Could not find any episodes that aired on %s" % date)
|
raise tvrage_episodenotfound("Could not find any episodes that aired on %s" % date)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def search(self, term = None, key = None):
|
def search(self, term=None, key=None):
|
||||||
"""
|
"""
|
||||||
Search all episodes in show. Can search all data, or a specific key (for
|
Search all episodes in show. Can search all data, or a specific key (for
|
||||||
example, episodename)
|
example, episodename)
|
||||||
|
@ -173,7 +178,7 @@ class Show(dict):
|
||||||
"""
|
"""
|
||||||
results = []
|
results = []
|
||||||
for cur_season in self.values():
|
for cur_season in self.values():
|
||||||
searchresult = cur_season.search(term = term, key = key)
|
searchresult = cur_season.search(term=term, key=key)
|
||||||
if len(searchresult) != 0:
|
if len(searchresult) != 0:
|
||||||
results.extend(searchresult)
|
results.extend(searchresult)
|
||||||
|
|
||||||
|
@ -181,7 +186,7 @@ class Show(dict):
|
||||||
|
|
||||||
|
|
||||||
class Season(dict):
|
class Season(dict):
|
||||||
def __init__(self, show = None):
|
def __init__(self, show=None):
|
||||||
"""The show attribute points to the parent show
|
"""The show attribute points to the parent show
|
||||||
"""
|
"""
|
||||||
self.show = show
|
self.show = show
|
||||||
|
@ -202,13 +207,13 @@ class Season(dict):
|
||||||
else:
|
else:
|
||||||
return dict.__getitem__(self, episode_number)
|
return dict.__getitem__(self, episode_number)
|
||||||
|
|
||||||
def search(self, term = None, key = None):
|
def search(self, term=None, key=None):
|
||||||
"""Search all episodes in season, returns a list of matching Episode
|
"""Search all episodes in season, returns a list of matching Episode
|
||||||
instances.
|
instances.
|
||||||
"""
|
"""
|
||||||
results = []
|
results = []
|
||||||
for ep in self.values():
|
for ep in self.values():
|
||||||
searchresult = ep.search(term = term, key = key)
|
searchresult = ep.search(term=term, key=key)
|
||||||
if searchresult is not None:
|
if searchresult is not None:
|
||||||
results.append(
|
results.append(
|
||||||
searchresult
|
searchresult
|
||||||
|
@ -217,7 +222,7 @@ class Season(dict):
|
||||||
|
|
||||||
|
|
||||||
class Episode(dict):
|
class Episode(dict):
|
||||||
def __init__(self, season = None):
|
def __init__(self, season=None):
|
||||||
"""The season attribute points to the parent season
|
"""The season attribute points to the parent season
|
||||||
"""
|
"""
|
||||||
self.season = season
|
self.season = season
|
||||||
|
@ -242,7 +247,7 @@ class Episode(dict):
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise tvrage_attributenotfound("Cannot find attribute %s" % (repr(key)))
|
raise tvrage_attributenotfound("Cannot find attribute %s" % (repr(key)))
|
||||||
|
|
||||||
def search(self, term = None, key = None):
|
def search(self, term=None, key=None):
|
||||||
"""Search episode data for term, if it matches, return the Episode (self).
|
"""Search episode data for term, if it matches, return the Episode (self).
|
||||||
The key parameter can be used to limit the search to a specific element,
|
The key parameter can be used to limit the search to a specific element,
|
||||||
for example, episodename.
|
for example, episodename.
|
||||||
|
@ -258,25 +263,27 @@ class Episode(dict):
|
||||||
if key is not None and cur_key != key:
|
if key is not None and cur_key != key:
|
||||||
# Do not search this key
|
# Do not search this key
|
||||||
continue
|
continue
|
||||||
if cur_value.find( unicode(term).lower() ) > -1:
|
if cur_value.find(unicode(term).lower()) > -1:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
class TVRage:
|
class TVRage:
|
||||||
"""Create easy-to-use interface to name of season/episode name"""
|
"""Create easy-to-use interface to name of season/episode name"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
interactive = False,
|
interactive=False,
|
||||||
select_first = False,
|
select_first=False,
|
||||||
debug = False,
|
debug=False,
|
||||||
cache = True,
|
cache=True,
|
||||||
banners = False,
|
banners=False,
|
||||||
actors = False,
|
actors=False,
|
||||||
custom_ui = None,
|
custom_ui=None,
|
||||||
language = None,
|
language=None,
|
||||||
search_all_languages = False,
|
search_all_languages=False,
|
||||||
apikey = None,
|
apikey=None,
|
||||||
forceConnect=False,
|
forceConnect=False,
|
||||||
useZip=False,
|
useZip=False,
|
||||||
dvdorder=False):
|
dvdorder=False):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
cache (True/False/str/unicode/urllib2 opener):
|
cache (True/False/str/unicode/urllib2 opener):
|
||||||
|
@ -294,18 +301,18 @@ class TVRage:
|
||||||
return an exception immediately.
|
return an exception immediately.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.shows = ShowContainer() # Holds all Show classes
|
self.shows = ShowContainer() # Holds all Show classes
|
||||||
self.corrections = {} # Holds show-name to show_id mapping
|
self.corrections = {} # Holds show-name to show_id mapping
|
||||||
self.sess = requests.session() # HTTP Session
|
self.sess = requests.session() # HTTP Session
|
||||||
|
|
||||||
self.config = {}
|
self.config = {}
|
||||||
|
|
||||||
if apikey is not None:
|
if apikey is not None:
|
||||||
self.config['apikey'] = apikey
|
self.config['apikey'] = apikey
|
||||||
else:
|
else:
|
||||||
self.config['apikey'] = "Uhewg1Rr0o62fvZvUIZt" # tvdb_api's API key
|
self.config['apikey'] = "Uhewg1Rr0o62fvZvUIZt" # tvdb_api's API key
|
||||||
|
|
||||||
self.config['debug_enabled'] = debug # show debugging messages
|
self.config['debug_enabled'] = debug # show debugging messages
|
||||||
|
|
||||||
self.config['custom_ui'] = custom_ui
|
self.config['custom_ui'] = custom_ui
|
||||||
|
|
||||||
|
@ -322,8 +329,8 @@ class TVRage:
|
||||||
|
|
||||||
if self.config['debug_enabled']:
|
if self.config['debug_enabled']:
|
||||||
warnings.warn("The debug argument to tvrage_api.__init__ will be removed in the next version. "
|
warnings.warn("The debug argument to tvrage_api.__init__ will be removed in the next version. "
|
||||||
"To enable debug messages, use the following code before importing: "
|
"To enable debug messages, use the following code before importing: "
|
||||||
"import logging; logging.basicConfig(level=logging.DEBUG)")
|
"import logging; logging.basicConfig(level=logging.DEBUG)")
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
|
|
||||||
|
@ -331,8 +338,8 @@ class TVRage:
|
||||||
# Hard-coded here as it is realtively static, and saves another HTTP request, as
|
# Hard-coded here as it is realtively static, and saves another HTTP request, as
|
||||||
# recommended on http://tvrage.com/wiki/index.php/API:languages.xml
|
# recommended on http://tvrage.com/wiki/index.php/API:languages.xml
|
||||||
self.config['valid_languages'] = [
|
self.config['valid_languages'] = [
|
||||||
"da", "fi", "nl", "de", "it", "es", "fr","pl", "hu","el","tr",
|
"da", "fi", "nl", "de", "it", "es", "fr", "pl", "hu", "el", "tr",
|
||||||
"ru","he","ja","pt","zh","cs","sl", "hr","ko","en","sv","no"
|
"ru", "he", "ja", "pt", "zh", "cs", "sl", "hr", "ko", "en", "sv", "no"
|
||||||
]
|
]
|
||||||
|
|
||||||
# tvrage.com should be based around numeric language codes,
|
# tvrage.com should be based around numeric language codes,
|
||||||
|
@ -340,9 +347,9 @@ class TVRage:
|
||||||
# requires the language ID, thus this mapping is required (mainly
|
# requires the language ID, thus this mapping is required (mainly
|
||||||
# for usage in tvrage_ui - internally tvrage_api will use the language abbreviations)
|
# for usage in tvrage_ui - internally tvrage_api will use the language abbreviations)
|
||||||
self.config['langabbv_to_id'] = {'el': 20, 'en': 7, 'zh': 27,
|
self.config['langabbv_to_id'] = {'el': 20, 'en': 7, 'zh': 27,
|
||||||
'it': 15, 'cs': 28, 'es': 16, 'ru': 22, 'nl': 13, 'pt': 26, 'no': 9,
|
'it': 15, 'cs': 28, 'es': 16, 'ru': 22, 'nl': 13, 'pt': 26, 'no': 9,
|
||||||
'tr': 21, 'pl': 18, 'fr': 17, 'hr': 31, 'de': 14, 'da': 10, 'fi': 11,
|
'tr': 21, 'pl': 18, 'fr': 17, 'hr': 31, 'de': 14, 'da': 10, 'fi': 11,
|
||||||
'hu': 19, 'ja': 25, 'he': 24, 'ko': 32, 'sv': 8, 'sl': 30}
|
'hu': 19, 'ja': 25, 'he': 24, 'ko': 32, 'sv': 8, 'sl': 30}
|
||||||
|
|
||||||
if language is None:
|
if language is None:
|
||||||
self.config['language'] = 'en'
|
self.config['language'] = 'en'
|
||||||
|
@ -390,9 +397,9 @@ class TVRage:
|
||||||
|
|
||||||
# get response from TVRage
|
# get response from TVRage
|
||||||
if self.config['cache_enabled']:
|
if self.config['cache_enabled']:
|
||||||
resp = self.sess.get(url, cache_auto=True, params=params)
|
resp = self.sess.get(url.strip(), cache_auto=True, params=params)
|
||||||
else:
|
else:
|
||||||
resp = requests.get(url, params=params)
|
resp = requests.get(url.strip(), params=params)
|
||||||
|
|
||||||
except requests.HTTPError, e:
|
except requests.HTTPError, e:
|
||||||
raise tvrage_error("HTTP error " + str(e.errno) + " while loading URL " + str(url))
|
raise tvrage_error("HTTP error " + str(e.errno) + " while loading URL " + str(url))
|
||||||
|
@ -403,81 +410,84 @@ class TVRage:
|
||||||
except requests.Timeout, e:
|
except requests.Timeout, e:
|
||||||
raise tvrage_error("Connection timed out " + str(e.message) + " while loading URL " + str(url))
|
raise tvrage_error("Connection timed out " + str(e.message) + " while loading URL " + str(url))
|
||||||
|
|
||||||
return resp.content if resp.ok else None
|
def remap_keys(path, key, value):
|
||||||
|
name_map = {
|
||||||
|
'showid': 'id',
|
||||||
|
'showname': 'seriesname',
|
||||||
|
'name': 'seriesname',
|
||||||
|
'summary': 'overview',
|
||||||
|
'started': 'firstaired',
|
||||||
|
'genres': 'genre',
|
||||||
|
'airtime': 'airs_time',
|
||||||
|
'airday': 'airs_dayofweek',
|
||||||
|
'image': 'fanart',
|
||||||
|
'epnum': 'absolute_number',
|
||||||
|
'title': 'episodename',
|
||||||
|
'airdate': 'firstaired',
|
||||||
|
'screencap': 'filename',
|
||||||
|
'seasonnum': 'episodenumber'
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
key = name_map[key.lower()]
|
||||||
|
except (ValueError, TypeError, KeyError):
|
||||||
|
key.lower()
|
||||||
|
|
||||||
|
# clean up value and do type changes
|
||||||
|
if value:
|
||||||
|
if isinstance(value, dict):
|
||||||
|
if key == 'network':
|
||||||
|
value = value['#text']
|
||||||
|
if key == 'genre':
|
||||||
|
value = value['genre']
|
||||||
|
if not isinstance(value, list):
|
||||||
|
value = [value]
|
||||||
|
value = '|' + '|'.join(value) + '|'
|
||||||
|
|
||||||
|
try:
|
||||||
|
# convert to integer if needed
|
||||||
|
if value.isdigit():
|
||||||
|
value = int(value)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
if key == 'firstaired' and value in "0000-00-00":
|
||||||
|
new_value = str(dt.date.fromordinal(1))
|
||||||
|
new_value = re.sub("([-]0{2}){1,}", "", new_value)
|
||||||
|
fixDate = parse(new_value, fuzzy=True).date()
|
||||||
|
value = fixDate.strftime("%Y-%m-%d")
|
||||||
|
elif key == 'firstaired':
|
||||||
|
value = parse(value, fuzzy=True).date()
|
||||||
|
value = value.strftime("%Y-%m-%d")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
value = self._cleanData(value)
|
||||||
|
return (key, value)
|
||||||
|
|
||||||
|
if resp.ok:
|
||||||
|
return xmltodict.parse(resp.text.strip(), postprocessor=remap_keys)
|
||||||
|
|
||||||
def _getetsrc(self, url, params=None):
|
def _getetsrc(self, url, params=None):
|
||||||
"""Loads a URL using caching, returns an ElementTree of the source
|
"""Loads a URL using caching, returns an ElementTree of the source
|
||||||
"""
|
"""
|
||||||
reDict = {
|
|
||||||
'showid': 'id',
|
|
||||||
'showname': 'seriesname',
|
|
||||||
'name': 'seriesname',
|
|
||||||
'summary': 'overview',
|
|
||||||
'started': 'firstaired',
|
|
||||||
'genres': 'genre',
|
|
||||||
'airtime': 'airs_time',
|
|
||||||
'airday': 'airs_dayofweek',
|
|
||||||
'image': 'fanart',
|
|
||||||
'epnum': 'absolute_number',
|
|
||||||
'title': 'episodename',
|
|
||||||
'airdate': 'firstaired',
|
|
||||||
'screencap': 'filename',
|
|
||||||
'seasonnum': 'episodenumber',
|
|
||||||
}
|
|
||||||
|
|
||||||
robj = re.compile('|'.join(reDict.keys()))
|
|
||||||
src = self._loadUrl(url, params)
|
|
||||||
try:
|
try:
|
||||||
# TVRAGE doesn't sanitize \r (CR) from user input in some fields,
|
|
||||||
# remove it to avoid errors. Change from SickBeard, from will14m
|
|
||||||
xml = ElementTree.fromstring(src.rstrip("\r"))
|
|
||||||
tree = ElementTree.ElementTree(xml)
|
|
||||||
for elm in tree.findall('.//*'):
|
|
||||||
elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag)
|
|
||||||
|
|
||||||
if elm.tag in 'firstaired':
|
|
||||||
try:
|
|
||||||
if elm.text in "0000-00-00":
|
|
||||||
elm.text = str(dt.date.fromordinal(1))
|
|
||||||
elm.text = re.sub("([-]0{2}){1,}", "", elm.text)
|
|
||||||
fixDate = parse(elm.text, fuzzy=True).date()
|
|
||||||
elm.text = fixDate.strftime("%Y-%m-%d")
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return ElementTree.fromstring(ElementTree.tostring(xml))
|
|
||||||
except SyntaxError:
|
|
||||||
src = self._loadUrl(url, params)
|
src = self._loadUrl(url, params)
|
||||||
try:
|
src = [src[item] for item in src][0]
|
||||||
xml = ElementTree.fromstring(src.rstrip("\r"))
|
except:
|
||||||
tree = ElementTree.ElementTree(xml)
|
errormsg = "There was an error with the XML retrieved from tvrage.com"
|
||||||
for elm in tree.findall('.//*'):
|
|
||||||
elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag)
|
|
||||||
|
|
||||||
if elm.tag in 'firstaired' and elm.text:
|
if self.config['cache_enabled']:
|
||||||
if elm.text == "0000-00-00":
|
errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
|
||||||
elm.text = str(dt.date.fromordinal(1))
|
self.config['cache_location']
|
||||||
try:
|
|
||||||
#month = strptime(match.group('air_month')[:3],'%b').tm_mon
|
|
||||||
#day = re.sub("(st|nd|rd|th)", "", match.group('air_day'))
|
|
||||||
#dtStr = '%s/%s/%s' % (year, month, day)
|
|
||||||
|
|
||||||
fixDate = parse(elm.text, fuzzy=True)
|
|
||||||
elm.text = fixDate.strftime("%Y-%m-%d")
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return ElementTree.fromstring(ElementTree.tostring(xml))
|
|
||||||
except SyntaxError, exceptionmsg:
|
|
||||||
errormsg = "There was an error with the XML retrieved from tvrage.com:\n%s" % (
|
|
||||||
exceptionmsg
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.config['cache_enabled']:
|
errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on\n"
|
||||||
errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
|
raise tvrage_error(errormsg)
|
||||||
self.config['cache_location']
|
|
||||||
)
|
|
||||||
|
|
||||||
errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on\n"
|
return src
|
||||||
raise tvrage_error(errormsg)
|
|
||||||
|
|
||||||
def _setItem(self, sid, seas, ep, attrib, value):
|
def _setItem(self, sid, seas, ep, attrib, value):
|
||||||
"""Creates a new episode, creating Show(), Season() and
|
"""Creates a new episode, creating Show(), Season() and
|
||||||
|
@ -497,9 +507,9 @@ class TVRage:
|
||||||
if sid not in self.shows:
|
if sid not in self.shows:
|
||||||
self.shows[sid] = Show()
|
self.shows[sid] = Show()
|
||||||
if seas not in self.shows[sid]:
|
if seas not in self.shows[sid]:
|
||||||
self.shows[sid][seas] = Season(show = self.shows[sid])
|
self.shows[sid][seas] = Season(show=self.shows[sid])
|
||||||
if ep not in self.shows[sid][seas]:
|
if ep not in self.shows[sid][seas]:
|
||||||
self.shows[sid][seas][ep] = Episode(season = self.shows[sid][seas])
|
self.shows[sid][seas][ep] = Episode(season=self.shows[sid][seas])
|
||||||
self.shows[sid][seas][ep][attrib] = value
|
self.shows[sid][seas][ep][attrib] = value
|
||||||
|
|
||||||
def _setShowData(self, sid, key, value):
|
def _setShowData(self, sid, key, value):
|
||||||
|
@ -529,9 +539,8 @@ class TVRage:
|
||||||
log().debug("Searching for show %s" % series)
|
log().debug("Searching for show %s" % series)
|
||||||
self.config['params_getSeries']['show'] = series
|
self.config['params_getSeries']['show'] = series
|
||||||
seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
|
seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
|
||||||
allSeries = list(dict((s.tag.lower(),s.text) for s in x.getchildren()) for x in seriesEt)
|
|
||||||
|
|
||||||
return allSeries
|
return [seriesEt[item] for item in seriesEt][0]
|
||||||
|
|
||||||
def _getSeries(self, series):
|
def _getSeries(self, series):
|
||||||
"""This searches tvrage.com for the series name,
|
"""This searches tvrage.com for the series name,
|
||||||
|
@ -547,10 +556,10 @@ class TVRage:
|
||||||
|
|
||||||
if self.config['custom_ui'] is not None:
|
if self.config['custom_ui'] is not None:
|
||||||
log().debug("Using custom UI %s" % (repr(self.config['custom_ui'])))
|
log().debug("Using custom UI %s" % (repr(self.config['custom_ui'])))
|
||||||
ui = self.config['custom_ui'](config = self.config)
|
ui = self.config['custom_ui'](config=self.config)
|
||||||
else:
|
else:
|
||||||
log().debug('Auto-selecting first search result using BaseUI')
|
log().debug('Auto-selecting first search result using BaseUI')
|
||||||
ui = BaseUI(config = self.config)
|
ui = BaseUI(config=self.config)
|
||||||
|
|
||||||
return ui.selectSeries(allSeries)
|
return ui.selectSeries(allSeries)
|
||||||
|
|
||||||
|
@ -568,62 +577,49 @@ class TVRage:
|
||||||
self.config['params_seriesInfo']
|
self.config['params_seriesInfo']
|
||||||
)
|
)
|
||||||
|
|
||||||
if seriesInfoEt is None: return False
|
# check and make sure we have data to process and that it contains a series name
|
||||||
for curInfo in seriesInfoEt:
|
if seriesInfoEt is None or 'seriesname' not in seriesInfoEt:
|
||||||
tag = curInfo.tag.lower()
|
return False
|
||||||
value = curInfo.text
|
|
||||||
|
|
||||||
if tag == 'seriesname' and value is None:
|
for k, v in seriesInfoEt.items():
|
||||||
return False
|
self._setShowData(sid, k, v)
|
||||||
|
|
||||||
if tag == 'id':
|
# series search ends here
|
||||||
value = int(value)
|
if seriesSearch:
|
||||||
|
return True
|
||||||
if value is not None:
|
|
||||||
value = self._cleanData(value)
|
|
||||||
|
|
||||||
self._setShowData(sid, tag, value)
|
|
||||||
if seriesSearch: return True
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Parse genre data
|
|
||||||
log().debug('Getting genres of %s' % (sid))
|
|
||||||
for genre in seriesInfoEt.find('genres'):
|
|
||||||
tag = genre.tag.lower()
|
|
||||||
|
|
||||||
value = genre.text
|
|
||||||
if value is not None:
|
|
||||||
value = self._cleanData(value)
|
|
||||||
|
|
||||||
self._setShowData(sid, tag, value)
|
|
||||||
except Exception:
|
|
||||||
log().debug('No genres for %s' % (sid))
|
|
||||||
|
|
||||||
# Parse episode data
|
# Parse episode data
|
||||||
log().debug('Getting all episodes of %s' % (sid))
|
log().debug('Getting all episodes of %s' % (sid))
|
||||||
|
|
||||||
self.config['params_epInfo']['sid'] = sid
|
self.config['params_epInfo']['sid'] = sid
|
||||||
epsEt = self._getetsrc(self.config['url_epInfo'], self.config['params_epInfo'])
|
epsEt = self._getetsrc(self.config['url_epInfo'], self.config['params_epInfo'])
|
||||||
for cur_list in epsEt.findall("Episodelist"):
|
|
||||||
for cur_seas in cur_list:
|
|
||||||
try:
|
|
||||||
seas_no = int(cur_seas.attrib['no'])
|
|
||||||
for cur_ep in cur_seas:
|
|
||||||
ep_no = int(cur_ep.find('episodenumber').text)
|
|
||||||
self._setItem(sid, seas_no, ep_no, 'seasonnumber', seas_no)
|
|
||||||
for cur_item in cur_ep:
|
|
||||||
tag = cur_item.tag.lower()
|
|
||||||
|
|
||||||
value = cur_item.text
|
for season in epsEt['Episodelist']['Season']:
|
||||||
if value is not None:
|
episodes = season['episode']
|
||||||
if tag == 'id':
|
if not isinstance(episodes, list):
|
||||||
value = int(value)
|
episodes = [episodes]
|
||||||
|
|
||||||
value = self._cleanData(value)
|
for episode in episodes:
|
||||||
|
seas_no = int(season['@no'])
|
||||||
|
ep_no = int(episode['episodenumber'])
|
||||||
|
self._setItem(sid, seas_no, ep_no, 'seasonnumber', seas_no)
|
||||||
|
|
||||||
self._setItem(sid, seas_no, ep_no, tag, value)
|
for k,v in episode.items():
|
||||||
except:
|
try:
|
||||||
continue
|
k = k.lower()
|
||||||
|
if v is not None:
|
||||||
|
if k == 'link':
|
||||||
|
v = v.rsplit('/', 1)[1]
|
||||||
|
k = 'id'
|
||||||
|
|
||||||
|
if k == 'id':
|
||||||
|
v = int(v)
|
||||||
|
|
||||||
|
v = self._cleanData(v)
|
||||||
|
|
||||||
|
self._setItem(sid, seas_no, ep_no, k, v)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _nameToSid(self, name):
|
def _nameToSid(self, name):
|
||||||
|
@ -632,7 +628,7 @@ class TVRage:
|
||||||
the correct SID.
|
the correct SID.
|
||||||
"""
|
"""
|
||||||
if name in self.corrections:
|
if name in self.corrections:
|
||||||
log().debug('Correcting %s to %s' % (name, self.corrections[name]) )
|
log().debug('Correcting %s to %s' % (name, self.corrections[name]))
|
||||||
return self.corrections[name]
|
return self.corrections[name]
|
||||||
else:
|
else:
|
||||||
log().debug('Getting show %s' % (name))
|
log().debug('Getting show %s' % (name))
|
||||||
|
@ -673,11 +669,13 @@ def main():
|
||||||
grabs an episode name interactively.
|
grabs an episode name interactively.
|
||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
tvrage_instance = TVRage(cache=False)
|
tvrage_instance = TVRage(cache=False)
|
||||||
print tvrage_instance['Lost']['seriesname']
|
print tvrage_instance['Lost']['seriesname']
|
||||||
print tvrage_instance['Lost'][1][4]['episodename']
|
print tvrage_instance['Lost'][1][4]['episodename']
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
|
@ -0,0 +1,359 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
"Makes working with XML feel like you are working with JSON"
|
||||||
|
|
||||||
|
from xml.parsers import expat
|
||||||
|
from xml.sax.saxutils import XMLGenerator
|
||||||
|
from xml.sax.xmlreader import AttributesImpl
|
||||||
|
try: # pragma no cover
|
||||||
|
from cStringIO import StringIO
|
||||||
|
except ImportError: # pragma no cover
|
||||||
|
try:
|
||||||
|
from StringIO import StringIO
|
||||||
|
except ImportError:
|
||||||
|
from io import StringIO
|
||||||
|
try: # pragma no cover
|
||||||
|
from collections import OrderedDict
|
||||||
|
except ImportError: # pragma no cover
|
||||||
|
try:
|
||||||
|
from ordereddict import OrderedDict
|
||||||
|
except ImportError:
|
||||||
|
OrderedDict = dict
|
||||||
|
|
||||||
|
try: # pragma no cover
|
||||||
|
_basestring = basestring
|
||||||
|
except NameError: # pragma no cover
|
||||||
|
_basestring = str
|
||||||
|
try: # pragma no cover
|
||||||
|
_unicode = unicode
|
||||||
|
except NameError: # pragma no cover
|
||||||
|
_unicode = str
|
||||||
|
|
||||||
|
__author__ = 'Martin Blech'
|
||||||
|
__version__ = '0.9.0'
|
||||||
|
__license__ = 'MIT'
|
||||||
|
|
||||||
|
|
||||||
|
class ParsingInterrupted(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class _DictSAXHandler(object):
|
||||||
|
def __init__(self,
|
||||||
|
item_depth=0,
|
||||||
|
item_callback=lambda *args: True,
|
||||||
|
xml_attribs=True,
|
||||||
|
attr_prefix='@',
|
||||||
|
cdata_key='#text',
|
||||||
|
force_cdata=False,
|
||||||
|
cdata_separator='',
|
||||||
|
postprocessor=None,
|
||||||
|
dict_constructor=OrderedDict,
|
||||||
|
strip_whitespace=True,
|
||||||
|
namespace_separator=':',
|
||||||
|
namespaces=None):
|
||||||
|
self.path = []
|
||||||
|
self.stack = []
|
||||||
|
self.data = None
|
||||||
|
self.item = None
|
||||||
|
self.item_depth = item_depth
|
||||||
|
self.xml_attribs = xml_attribs
|
||||||
|
self.item_callback = item_callback
|
||||||
|
self.attr_prefix = attr_prefix
|
||||||
|
self.cdata_key = cdata_key
|
||||||
|
self.force_cdata = force_cdata
|
||||||
|
self.cdata_separator = cdata_separator
|
||||||
|
self.postprocessor = postprocessor
|
||||||
|
self.dict_constructor = dict_constructor
|
||||||
|
self.strip_whitespace = strip_whitespace
|
||||||
|
self.namespace_separator = namespace_separator
|
||||||
|
self.namespaces = namespaces
|
||||||
|
|
||||||
|
def _build_name(self, full_name):
|
||||||
|
if not self.namespaces:
|
||||||
|
return full_name
|
||||||
|
i = full_name.rfind(self.namespace_separator)
|
||||||
|
if i == -1:
|
||||||
|
return full_name
|
||||||
|
namespace, name = full_name[:i], full_name[i+1:]
|
||||||
|
short_namespace = self.namespaces.get(namespace, namespace)
|
||||||
|
if not short_namespace:
|
||||||
|
return name
|
||||||
|
else:
|
||||||
|
return self.namespace_separator.join((short_namespace, name))
|
||||||
|
|
||||||
|
def _attrs_to_dict(self, attrs):
|
||||||
|
if isinstance(attrs, dict):
|
||||||
|
return attrs
|
||||||
|
return self.dict_constructor(zip(attrs[0::2], attrs[1::2]))
|
||||||
|
|
||||||
|
def startElement(self, full_name, attrs):
|
||||||
|
name = self._build_name(full_name)
|
||||||
|
attrs = self._attrs_to_dict(attrs)
|
||||||
|
self.path.append((name, attrs or None))
|
||||||
|
if len(self.path) > self.item_depth:
|
||||||
|
self.stack.append((self.item, self.data))
|
||||||
|
if self.xml_attribs:
|
||||||
|
attrs = self.dict_constructor(
|
||||||
|
(self.attr_prefix+key, value)
|
||||||
|
for (key, value) in attrs.items())
|
||||||
|
else:
|
||||||
|
attrs = None
|
||||||
|
self.item = attrs or None
|
||||||
|
self.data = None
|
||||||
|
|
||||||
|
def endElement(self, full_name):
|
||||||
|
name = self._build_name(full_name)
|
||||||
|
if len(self.path) == self.item_depth:
|
||||||
|
item = self.item
|
||||||
|
if item is None:
|
||||||
|
item = self.data
|
||||||
|
should_continue = self.item_callback(self.path, item)
|
||||||
|
if not should_continue:
|
||||||
|
raise ParsingInterrupted()
|
||||||
|
if len(self.stack):
|
||||||
|
item, data = self.item, self.data
|
||||||
|
self.item, self.data = self.stack.pop()
|
||||||
|
if self.strip_whitespace and data is not None:
|
||||||
|
data = data.strip() or None
|
||||||
|
if data and self.force_cdata and item is None:
|
||||||
|
item = self.dict_constructor()
|
||||||
|
if item is not None:
|
||||||
|
if data:
|
||||||
|
self.push_data(item, self.cdata_key, data)
|
||||||
|
self.item = self.push_data(self.item, name, item)
|
||||||
|
else:
|
||||||
|
self.item = self.push_data(self.item, name, data)
|
||||||
|
else:
|
||||||
|
self.item = self.data = None
|
||||||
|
self.path.pop()
|
||||||
|
|
||||||
|
def characters(self, data):
|
||||||
|
if not self.data:
|
||||||
|
self.data = data
|
||||||
|
else:
|
||||||
|
self.data += self.cdata_separator + data
|
||||||
|
|
||||||
|
def push_data(self, item, key, data):
|
||||||
|
if self.postprocessor is not None:
|
||||||
|
result = self.postprocessor(self.path, key, data)
|
||||||
|
if result is None:
|
||||||
|
return item
|
||||||
|
key, data = result
|
||||||
|
if item is None:
|
||||||
|
item = self.dict_constructor()
|
||||||
|
try:
|
||||||
|
value = item[key]
|
||||||
|
if isinstance(value, list):
|
||||||
|
value.append(data)
|
||||||
|
else:
|
||||||
|
item[key] = [value, data]
|
||||||
|
except KeyError:
|
||||||
|
item[key] = data
|
||||||
|
return item
|
||||||
|
|
||||||
|
|
||||||
|
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
|
||||||
|
namespace_separator=':', **kwargs):
|
||||||
|
"""Parse the given XML input and convert it into a dictionary.
|
||||||
|
|
||||||
|
`xml_input` can either be a `string` or a file-like object.
|
||||||
|
|
||||||
|
If `xml_attribs` is `True`, element attributes are put in the dictionary
|
||||||
|
among regular child elements, using `@` as a prefix to avoid collisions. If
|
||||||
|
set to `False`, they are just ignored.
|
||||||
|
|
||||||
|
Simple example::
|
||||||
|
|
||||||
|
>>> import xmltodict
|
||||||
|
>>> doc = xmltodict.parse(\"\"\"
|
||||||
|
... <a prop="x">
|
||||||
|
... <b>1</b>
|
||||||
|
... <b>2</b>
|
||||||
|
... </a>
|
||||||
|
... \"\"\")
|
||||||
|
>>> doc['a']['@prop']
|
||||||
|
u'x'
|
||||||
|
>>> doc['a']['b']
|
||||||
|
[u'1', u'2']
|
||||||
|
|
||||||
|
If `item_depth` is `0`, the function returns a dictionary for the root
|
||||||
|
element (default behavior). Otherwise, it calls `item_callback` every time
|
||||||
|
an item at the specified depth is found and returns `None` in the end
|
||||||
|
(streaming mode).
|
||||||
|
|
||||||
|
The callback function receives two parameters: the `path` from the document
|
||||||
|
root to the item (name-attribs pairs), and the `item` (dict). If the
|
||||||
|
callback's return value is false-ish, parsing will be stopped with the
|
||||||
|
:class:`ParsingInterrupted` exception.
|
||||||
|
|
||||||
|
Streaming example::
|
||||||
|
|
||||||
|
>>> def handle(path, item):
|
||||||
|
... print 'path:%s item:%s' % (path, item)
|
||||||
|
... return True
|
||||||
|
...
|
||||||
|
>>> xmltodict.parse(\"\"\"
|
||||||
|
... <a prop="x">
|
||||||
|
... <b>1</b>
|
||||||
|
... <b>2</b>
|
||||||
|
... </a>\"\"\", item_depth=2, item_callback=handle)
|
||||||
|
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
|
||||||
|
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2
|
||||||
|
|
||||||
|
The optional argument `postprocessor` is a function that takes `path`,
|
||||||
|
`key` and `value` as positional arguments and returns a new `(key, value)`
|
||||||
|
pair where both `key` and `value` may have changed. Usage example::
|
||||||
|
|
||||||
|
>>> def postprocessor(path, key, value):
|
||||||
|
... try:
|
||||||
|
... return key + ':int', int(value)
|
||||||
|
... except (ValueError, TypeError):
|
||||||
|
... return key, value
|
||||||
|
>>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
|
||||||
|
... postprocessor=postprocessor)
|
||||||
|
OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))])
|
||||||
|
|
||||||
|
You can pass an alternate version of `expat` (such as `defusedexpat`) by
|
||||||
|
using the `expat` parameter. E.g:
|
||||||
|
|
||||||
|
>>> import defusedexpat
|
||||||
|
>>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
|
||||||
|
OrderedDict([(u'a', u'hello')])
|
||||||
|
|
||||||
|
"""
|
||||||
|
handler = _DictSAXHandler(namespace_separator=namespace_separator,
|
||||||
|
**kwargs)
|
||||||
|
if isinstance(xml_input, _unicode):
|
||||||
|
if not encoding:
|
||||||
|
encoding = 'utf-8'
|
||||||
|
xml_input = xml_input.encode(encoding)
|
||||||
|
if not process_namespaces:
|
||||||
|
namespace_separator = None
|
||||||
|
parser = expat.ParserCreate(
|
||||||
|
encoding,
|
||||||
|
namespace_separator
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
parser.ordered_attributes = True
|
||||||
|
except AttributeError:
|
||||||
|
# Jython's expat does not support ordered_attributes
|
||||||
|
pass
|
||||||
|
parser.StartElementHandler = handler.startElement
|
||||||
|
parser.EndElementHandler = handler.endElement
|
||||||
|
parser.CharacterDataHandler = handler.characters
|
||||||
|
parser.buffer_text = True
|
||||||
|
try:
|
||||||
|
parser.ParseFile(xml_input)
|
||||||
|
except (TypeError, AttributeError):
|
||||||
|
parser.Parse(xml_input, True)
|
||||||
|
return handler.item
|
||||||
|
|
||||||
|
|
||||||
|
def _emit(key, value, content_handler,
|
||||||
|
attr_prefix='@',
|
||||||
|
cdata_key='#text',
|
||||||
|
depth=0,
|
||||||
|
preprocessor=None,
|
||||||
|
pretty=False,
|
||||||
|
newl='\n',
|
||||||
|
indent='\t'):
|
||||||
|
if preprocessor is not None:
|
||||||
|
result = preprocessor(key, value)
|
||||||
|
if result is None:
|
||||||
|
return
|
||||||
|
key, value = result
|
||||||
|
if not isinstance(value, (list, tuple)):
|
||||||
|
value = [value]
|
||||||
|
if depth == 0 and len(value) > 1:
|
||||||
|
raise ValueError('document with multiple roots')
|
||||||
|
for v in value:
|
||||||
|
if v is None:
|
||||||
|
v = OrderedDict()
|
||||||
|
elif not isinstance(v, dict):
|
||||||
|
v = _unicode(v)
|
||||||
|
if isinstance(v, _basestring):
|
||||||
|
v = OrderedDict(((cdata_key, v),))
|
||||||
|
cdata = None
|
||||||
|
attrs = OrderedDict()
|
||||||
|
children = []
|
||||||
|
for ik, iv in v.items():
|
||||||
|
if ik == cdata_key:
|
||||||
|
cdata = iv
|
||||||
|
continue
|
||||||
|
if ik.startswith(attr_prefix):
|
||||||
|
attrs[ik[len(attr_prefix):]] = iv
|
||||||
|
continue
|
||||||
|
children.append((ik, iv))
|
||||||
|
if pretty:
|
||||||
|
content_handler.ignorableWhitespace(depth * indent)
|
||||||
|
content_handler.startElement(key, AttributesImpl(attrs))
|
||||||
|
if pretty and children:
|
||||||
|
content_handler.ignorableWhitespace(newl)
|
||||||
|
for child_key, child_value in children:
|
||||||
|
_emit(child_key, child_value, content_handler,
|
||||||
|
attr_prefix, cdata_key, depth+1, preprocessor,
|
||||||
|
pretty, newl, indent)
|
||||||
|
if cdata is not None:
|
||||||
|
content_handler.characters(cdata)
|
||||||
|
if pretty and children:
|
||||||
|
content_handler.ignorableWhitespace(depth * indent)
|
||||||
|
content_handler.endElement(key)
|
||||||
|
if pretty and depth:
|
||||||
|
content_handler.ignorableWhitespace(newl)
|
||||||
|
|
||||||
|
|
||||||
|
def unparse(input_dict, output=None, encoding='utf-8', full_document=True,
|
||||||
|
**kwargs):
|
||||||
|
"""Emit an XML document for the given `input_dict` (reverse of `parse`).
|
||||||
|
|
||||||
|
The resulting XML document is returned as a string, but if `output` (a
|
||||||
|
file-like object) is specified, it is written there instead.
|
||||||
|
|
||||||
|
Dictionary keys prefixed with `attr_prefix` (default=`'@'`) are interpreted
|
||||||
|
as XML node attributes, whereas keys equal to `cdata_key`
|
||||||
|
(default=`'#text'`) are treated as character data.
|
||||||
|
|
||||||
|
The `pretty` parameter (default=`False`) enables pretty-printing. In this
|
||||||
|
mode, lines are terminated with `'\n'` and indented with `'\t'`, but this
|
||||||
|
can be customized with the `newl` and `indent` parameters.
|
||||||
|
|
||||||
|
"""
|
||||||
|
((key, value),) = input_dict.items()
|
||||||
|
must_return = False
|
||||||
|
if output is None:
|
||||||
|
output = StringIO()
|
||||||
|
must_return = True
|
||||||
|
content_handler = XMLGenerator(output, encoding)
|
||||||
|
if full_document:
|
||||||
|
content_handler.startDocument()
|
||||||
|
_emit(key, value, content_handler, **kwargs)
|
||||||
|
if full_document:
|
||||||
|
content_handler.endDocument()
|
||||||
|
if must_return:
|
||||||
|
value = output.getvalue()
|
||||||
|
try: # pragma no cover
|
||||||
|
value = value.decode(encoding)
|
||||||
|
except AttributeError: # pragma no cover
|
||||||
|
pass
|
||||||
|
return value
|
||||||
|
|
||||||
|
if __name__ == '__main__': # pragma: no cover
|
||||||
|
import sys
|
||||||
|
import marshal
|
||||||
|
|
||||||
|
(item_depth,) = sys.argv[1:]
|
||||||
|
item_depth = int(item_depth)
|
||||||
|
|
||||||
|
def handle_item(path, item):
|
||||||
|
marshal.dump((path, item), sys.stdout)
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
|
root = parse(sys.stdin,
|
||||||
|
item_depth=item_depth,
|
||||||
|
item_callback=handle_item,
|
||||||
|
dict_constructor=dict)
|
||||||
|
if item_depth == 0:
|
||||||
|
handle_item([], root)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
|
@ -782,14 +782,10 @@ class GenericMetadata():
|
||||||
|
|
||||||
# Try and get posters and fanart from TMDB
|
# Try and get posters and fanart from TMDB
|
||||||
if image_url is None:
|
if image_url is None:
|
||||||
for show_name in set(allPossibleShowNames(show_obj)):
|
if image_type in ('poster', 'poster_thumb'):
|
||||||
if image_type in ('poster', 'poster_thumb'):
|
image_url = self._retrieve_show_images_from_tmdb(show_obj, poster=True)
|
||||||
image_url = self._retrieve_show_images_from_tmdb(show_obj, poster=True)
|
elif image_type == 'fanart':
|
||||||
elif image_type == 'fanart':
|
image_url = self._retrieve_show_images_from_tmdb(show_obj, backdrop=True)
|
||||||
image_url = self._retrieve_show_images_from_tmdb(show_obj, backdrop=True)
|
|
||||||
|
|
||||||
if image_url:
|
|
||||||
break
|
|
||||||
|
|
||||||
if image_url:
|
if image_url:
|
||||||
image_data = metadata_helpers.getShowImage(image_url, which)
|
image_data = metadata_helpers.getShowImage(image_url, which)
|
||||||
|
@ -965,8 +961,6 @@ class GenericMetadata():
|
||||||
return (indexer_id, name, indexer)
|
return (indexer_id, name, indexer)
|
||||||
|
|
||||||
def _retrieve_show_images_from_tmdb(self, show, backdrop=False, poster=False):
|
def _retrieve_show_images_from_tmdb(self, show, backdrop=False, poster=False):
|
||||||
tmdb_id = None
|
|
||||||
|
|
||||||
# get TMDB configuration info
|
# get TMDB configuration info
|
||||||
tmdb = TMDB(sickbeard.TMDB_API_KEY)
|
tmdb = TMDB(sickbeard.TMDB_API_KEY)
|
||||||
config = tmdb.Configuration()
|
config = tmdb.Configuration()
|
||||||
|
@ -981,27 +975,14 @@ class GenericMetadata():
|
||||||
|
|
||||||
try:
|
try:
|
||||||
search = tmdb.Search()
|
search = tmdb.Search()
|
||||||
for result in search.collection({'query': show.name}) + search.tv({'query': show.name}):
|
for show_name in set(allPossibleShowNames(show)):
|
||||||
tmdb_id = result['id']
|
for result in search.collection({'query': show_name})['results'] + search.tv({'query': show_name})['results']:
|
||||||
external_ids = tmdb.TV(tmdb_id).external_ids()
|
if backdrop and result['backdrop_path']:
|
||||||
if show.indexerid in [external_ids['tvdb_id'], external_ids['tvrage_id']]:
|
return "{0}{1}{2}".format(base_url, max_size, result['backdrop_path'])
|
||||||
break
|
elif poster and result['poster_path']:
|
||||||
|
return "{0}{1}{2}".format(base_url, max_size, result['poster_path'])
|
||||||
|
|
||||||
if tmdb_id:
|
except Exception, e:
|
||||||
images = tmdb.Collections(tmdb_id).images()
|
|
||||||
if len(images) > 0:
|
|
||||||
# get backdrop urls
|
|
||||||
if backdrop:
|
|
||||||
rel_path = images['backdrops'][0]['file_path']
|
|
||||||
url = "{0}{1}{2}".format(base_url, max_size, rel_path)
|
|
||||||
return url
|
|
||||||
|
|
||||||
# get poster urls
|
|
||||||
if poster:
|
|
||||||
rel_path = images['posters'][0]['file_path']
|
|
||||||
url = "{0}{1}{2}".format(base_url, max_size, rel_path)
|
|
||||||
return url
|
|
||||||
except:
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
logger.log(u"Could not find any posters or background for " + show.name, logger.DEBUG)
|
logger.log(u"Could not find any posters or background for " + show.name, logger.DEBUG)
|
|
@ -829,7 +829,7 @@ class TVShow(object):
|
||||||
self.airs = myEp["airs_dayofweek"] + " " + myEp["airs_time"]
|
self.airs = myEp["airs_dayofweek"] + " " + myEp["airs_time"]
|
||||||
|
|
||||||
if getattr(myEp, 'firstaired', None) is not None:
|
if getattr(myEp, 'firstaired', None) is not None:
|
||||||
self.startyear = int(myEp["firstaired"].split('-')[0])
|
self.startyear = int(str(myEp["firstaired"]).split('-')[0])
|
||||||
|
|
||||||
self.status = getattr(myEp, 'status', '')
|
self.status = getattr(myEp, 'status', '')
|
||||||
|
|
||||||
|
@ -855,7 +855,6 @@ class TVShow(object):
|
||||||
i = imdb.IMDb()
|
i = imdb.IMDb()
|
||||||
imdbTv = i.get_movie(str(re.sub("[^0-9]", "", self.imdbid)))
|
imdbTv = i.get_movie(str(re.sub("[^0-9]", "", self.imdbid)))
|
||||||
|
|
||||||
test = imdbTv.keys()
|
|
||||||
for key in filter(lambda x: x.replace('_', ' ') in imdbTv.keys(), imdb_info.keys()):
|
for key in filter(lambda x: x.replace('_', ' ') in imdbTv.keys(), imdb_info.keys()):
|
||||||
# Store only the first value for string type
|
# Store only the first value for string type
|
||||||
if type(imdb_info[key]) == type('') and type(imdbTv.get(key)) == type([]):
|
if type(imdb_info[key]) == type('') and type(imdbTv.get(key)) == type([]):
|
||||||
|
@ -1556,7 +1555,7 @@ class TVEpisode(object):
|
||||||
self.deleteEpisode()
|
self.deleteEpisode()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if myEp["absolute_number"] == None or myEp["absolute_number"] == "":
|
if getattr(myEp, 'absolute_number', None) is None:
|
||||||
logger.log(u"This episode (" + self.show.name + " - " + str(season) + "x" + str(
|
logger.log(u"This episode (" + self.show.name + " - " + str(season) + "x" + str(
|
||||||
episode) + ") has no absolute number on " + sickbeard.indexerApi(
|
episode) + ") has no absolute number on " + sickbeard.indexerApi(
|
||||||
self.indexer).name
|
self.indexer).name
|
||||||
|
@ -1564,7 +1563,7 @@ class TVEpisode(object):
|
||||||
else:
|
else:
|
||||||
logger.log(
|
logger.log(
|
||||||
str(self.show.indexerid) + ": The absolute_number for " + str(season) + "x" + str(episode) + " is : " +
|
str(self.show.indexerid) + ": The absolute_number for " + str(season) + "x" + str(episode) + " is : " +
|
||||||
myEp["absolute_number"], logger.DEBUG)
|
str(myEp["absolute_number"]), logger.DEBUG)
|
||||||
self.absolute_number = int(myEp["absolute_number"])
|
self.absolute_number = int(myEp["absolute_number"])
|
||||||
|
|
||||||
self.name = getattr(myEp, 'episodename', "")
|
self.name = getattr(myEp, 'episodename', "")
|
||||||
|
@ -1603,8 +1602,9 @@ class TVEpisode(object):
|
||||||
u"The show dir is missing, not bothering to change the episode statuses since it'd probably be invalid")
|
u"The show dir is missing, not bothering to change the episode statuses since it'd probably be invalid")
|
||||||
return
|
return
|
||||||
|
|
||||||
logger.log(str(self.show.indexerid) + u": Setting status for " + str(season) + "x" + str(
|
if self.location:
|
||||||
episode) + " based on status " + str(self.status) + " and existence of " + self.location, logger.DEBUG)
|
logger.log(str(self.show.indexerid) + u": Setting status for " + str(season) + "x" + str(
|
||||||
|
episode) + " based on status " + str(self.status) + " and existence of " + self.location, logger.DEBUG)
|
||||||
|
|
||||||
if not ek.ek(os.path.isfile, self.location):
|
if not ek.ek(os.path.isfile, self.location):
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue