Update imdbpy libs to v5.0

Fixed invalid indexer id issues for TVRage shows. Fixed issues for getting posters and backdrops for TVRage shows. We now convert XML straight to a dict object for Indexer APIs, improved overall performance api's Fixed issues with TVRage shows and displaying genre's properly.
2014-05-28 22:40:12 -07:00 · 2014-05-28 22:40:12 -07:00 · 2dcd26e69c
parent 764cf6e62e
commit 2dcd26e69c
30 changed files with 7446 additions and 453 deletions
--- a/lib/imdb/init.py
+++ b/lib/imdb/init.py
@ -6,7 +6,7 @@ a person from the IMDb database.
 It can fetch data through different media (e.g.: the IMDb web pages,
 a SQL database, etc.)

-Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
+Copyright 2004-2014 Davide Alberani <da@erlug.linux.it>

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@ -25,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 __all__ = ['IMDb', 'IMDbError', 'Movie', 'Person', 'Character', 'Company',
            'available_access_systems']
-__version__ = VERSION = '4.9'
+__version__ = VERSION = '5.0'

 # Import compatibility module (importing it is enough).
 import _compat
@ -160,6 +160,7 @@ def IMDb(accessSystem=None, *arguments, **keywords):
            kwds.update(keywords)
            keywords = kwds
        except Exception, e:
+            import logging
            logging.getLogger('imdbpy').warn('Unable to read configuration' \
                                            ' file; complete error: %s' % e)
            # It just LOOKS LIKE a bad habit: we tried to read config
@ -303,7 +304,7 @@ class IMDbBase:
        # http://akas.imdb.com/keyword/%s/
        imdbURL_keyword_main=imdbURL_base + 'keyword/%s/'
        # http://akas.imdb.com/chart/top
-        imdbURL_top250=imdbURL_base + 'chart/top',
+        imdbURL_top250=imdbURL_base + 'chart/top'
        # http://akas.imdb.com/chart/bottom
        imdbURL_bottom100=imdbURL_base + 'chart/bottom'
        # http://akas.imdb.com/find?%s
@ -824,22 +825,23 @@ class IMDbBase:
        #      subclass, somewhere under the imdb.parser package.
        raise NotImplementedError('override this method')

-    def _searchIMDb(self, kind, ton):
+    def _searchIMDb(self, kind, ton, title_kind=None):
        """Search the IMDb akas server for the given title or name."""
        # The Exact Primary search system has gone AWOL, so we resort
        # to the mobile search. :-/
        if not ton:
            return None
+        ton = ton.strip('"')
        aSystem = IMDb('mobile')
        if kind == 'tt':
            searchFunct = aSystem.search_movie
-            check = 'long imdb canonical title'
+            check = 'long imdb title'
        elif kind == 'nm':
            searchFunct = aSystem.search_person
-            check = 'long imdb canonical name'
+            check = 'long imdb name'
        elif kind == 'char':
            searchFunct = aSystem.search_character
-            check = 'long imdb canonical name'
+            check = 'long imdb name'
        elif kind == 'co':
            # XXX: are [COUNTRY] codes included in the results?
            searchFunct = aSystem.search_company
@ -852,24 +854,42 @@ class IMDbBase:
        # exact match.
        if len(searchRes) == 1:
            return searchRes[0].getID()
+        title_only_matches = []
        for item in searchRes:
            # Return the first perfect match.
-            if item[check] == ton:
+            if item[check].strip('"') == ton:
+                # For titles do additional check for kind
+                if kind != 'tt' or title_kind == item['kind']:
                    return item.getID()
+                elif kind == 'tt':
+                    title_only_matches.append(item.getID())
+        # imdbpy2sql.py could detected wrong type, so if no title and kind
+        # matches found - collect all results with title only match
+        # Return list of IDs if multiple matches (can happen when searching
+        # titles with no title_kind specified)
+        # Example: DB: Band of Brothers "tv series" vs "tv mini-series"
+        if title_only_matches:
+            if len(title_only_matches) == 1:
+                return title_only_matches[0]
+            else:
+                return title_only_matches
        return None

-    def title2imdbID(self, title):
+    def title2imdbID(self, title, kind=None):
        """Translate a movie title (in the plain text data files format)
        to an imdbID.
        Try an Exact Primary Title search on IMDb;
-        return None if it's unable to get the imdbID."""
-        return self._searchIMDb('tt', title)
+        return None if it's unable to get the imdbID;
+        Always specify kind: movie, tv series, video game etc. or search can
+        return list of IDs if multiple matches found
+        """
+        return self._searchIMDb('tt', title, kind)

    def name2imdbID(self, name):
        """Translate a person name in an imdbID.
        Try an Exact Primary Name search on IMDb;
        return None if it's unable to get the imdbID."""
-        return self._searchIMDb('tt', name)
+        return self._searchIMDb('nm', name)

    def character2imdbID(self, name):
        """Translate a character name in an imdbID.
@ -896,7 +916,8 @@ class IMDbBase:
                imdbID = aSystem.get_imdbMovieID(mop.movieID)
            else:
                imdbID = aSystem.title2imdbID(build_title(mop, canonical=0,
-                                                ptdf=1))
+                                                ptdf=0, appendKind=False),
+                                                mop['kind'])
        elif isinstance(mop, Person.Person):
            if mop.personID is not None:
                imdbID = aSystem.get_imdbPersonID(mop.personID)
--- a/lib/imdb/imdbpy.cfg
+++ b/lib/imdb/imdbpy.cfg
@ -29,7 +29,7 @@

 [imdbpy]
 ## Default.
-accessSystem = mobile
+accessSystem = http

 ## Optional (options common to every data access system):
 # Activate adult searches (on, by default).
@ -37,7 +37,7 @@ accessSystem = mobile
 # Number of results for searches (20 by default).
 #results = 20
 # Re-raise all caught exceptions (off, by default).
-reraiseExceptions = on
+#reraiseExceptions = off

 ## Optional (options common to http and mobile data access systems):
 # Proxy used to access the network.  If it requires authentication,
@ -69,7 +69,7 @@ reraiseExceptions = on
 ## Set the threshold for logging messages.
 # Can be one of "debug", "info", "warning", "error", "critical" (default:
 # "warning").
-loggingLevel = info
+#loggingLevel = debug

 ## Path to a configuration file for the logging facility;
 # see: http://docs.python.org/library/logging.html#configuring-logging
--- a/lib/imdb/linguistics.py
+++ b/lib/imdb/linguistics.py
@ -64,8 +64,10 @@ LANG_ARTICLES = {
    'English': ('the', 'a', 'an'),
    'Italian': ('la', 'le', "l'", 'il', 'i', 'un', 'una', 'gli', 'lo', "un'",
                'uno'),
-    'Spanish': ('la', 'le', 'el', 'les', 'un', 'los', 'una', 'uno', 'unos',
-                'unas'),
+    'Spanish': ('la', 'lo', 'el', 'las', 'un', 'los', 'una', 'al', 'del',
+                'unos', 'unas', 'uno'),
+    'French': ('le', "l'", 'la', 'les', 'un', 'une', 'des', 'au', 'du', '\xc3\xa0 la',
+                'de la', 'aux'),
    'Portuguese': ('a', 'as', 'o', 'os', 'um', 'uns', 'uma', 'umas'),
    'Turkish': (), # Some languages doesn't have articles.
 }
--- a/lib/imdb/locale/generatepot.py
+++ b/lib/imdb/locale/generatepot.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
 """
 generatepot.py script.

--- a/lib/imdb/locale/imdbpy-ar.po
+++ b/lib/imdb/locale/imdbpy-ar.po
--- a/lib/imdb/locale/imdbpy-bg.po
+++ b/lib/imdb/locale/imdbpy-bg.po
--- a/lib/imdb/locale/imdbpy-de.po
+++ b/lib/imdb/locale/imdbpy-de.po
--- a/lib/imdb/locale/imdbpy-es.po
+++ b/lib/imdb/locale/imdbpy-es.po
--- a/lib/imdb/locale/imdbpy-fr.po
+++ b/lib/imdb/locale/imdbpy-fr.po
--- a/lib/imdb/locale/msgfmt.py
+++ b/lib/imdb/locale/msgfmt.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
 # -*- coding: iso-8859-1 -*-
 """Generate binary message catalog from textual translation description.

--- a/lib/imdb/locale/rebuildmo.py
+++ b/lib/imdb/locale/rebuildmo.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
 """
 rebuildmo.py script.

--- a/lib/imdb/parser/http/init.py
+++ b/lib/imdb/parser/http/init.py
@ -104,15 +104,24 @@ PY_VERSION = sys.version_info[:2]
 # The cookies for the "adult" search.
 # Please don't mess with these account.
 # Old 'IMDbPY' account.
-_old_cookie_id = 'boM2bYxz9MCsOnH9gZ0S9QHs12NWrNdApxsls1Vb5/NGrNdjcHx3dUas10UASoAjVEvhAbGagERgOpNkAPvxdbfKwaV2ikEj9SzXY1WPxABmDKQwdqzwRbM+12NSeJFGUEx3F8as10WwidLzVshDtxaPIbP13NdjVS9UZTYqgTVGrNcT9vyXU1'
-_old_cookie_uu = '3M3AXsquTU5Gur/Svik+ewflPm5Rk2ieY3BIPlLjyK3C0Dp9F8UoPgbTyKiGtZp4x1X+uAUGKD7BM2g+dVd8eqEzDErCoYvdcvGLvVLAen1y08hNQtALjVKAe+1hM8g9QbNonlG1/t4S82ieUsBbrSIQbq1yhV6tZ6ArvSbA7rgHc8n5AdReyAmDaJ5Wm/ee3VDoCnGj/LlBs2ieUZNorhHDKK5Q=='
-# New 'IMDbPYweb' account.
-_cookie_id = 'rH1jNAkjTlNXvHolvBVBsgaPICNZbNdjVjzFwzas9JRmusdjVoqBs/Hs12NR+1WFxEoR9bGKEDUg6sNlADqXwkas12N131Rwdb+UQNGKN8PWrNdjcdqBQVLq8mbGDHP3hqzxhbD692NQi9D0JjpBtRaPIbP1zNdjUOqENQYv1ADWrNcT9vyXU1'
-_cookie_uu = 'su4/m8cho4c6HP+W1qgq6wchOmhnF0w+lIWvHjRUPJ6nRA9sccEafjGADJ6hQGrMd4GKqLcz2X4z5+w+M4OIKnRn7FpENH7dxDQu3bQEHyx0ZEyeRFTPHfQEX03XF+yeN1dsPpcXaqjUZAw+lGRfXRQEfz3RIX9IgVEffdBAHw2wQXyf9xdMPrQELw0QNB8dsffsqcdQemjPB0w+moLcPh0JrKrHJ9hjBzdMPpcXTH7XRwwOk='
+_IMDbPY_cookie_id = 'boM2bYxz9MCsOnH9gZ0S9QHs12NWrNdApxsls1Vb5/NGrNdjcHx3dUas10UASoAjVEvhAbGagERgOpNkAPvxdbfKwaV2ikEj9SzXY1WPxABmDKQwdqzwRbM+12NSeJFGUEx3F8as10WwidLzVshDtxaPIbP13NdjVS9UZTYqgTVGrNcT9vyXU1'
+_IMDbPY_cookie_uu = '3M3AXsquTU5Gur/Svik+ewflPm5Rk2ieY3BIPlLjyK3C0Dp9F8UoPgbTyKiGtZp4x1X+uAUGKD7BM2g+dVd8eqEzDErCoYvdcvGLvVLAen1y08hNQtALjVKAe+1hM8g9QbNonlG1/t4S82ieUsBbrSIQbq1yhV6tZ6ArvSbA7rgHc8n5AdReyAmDaJ5Wm/ee3VDoCnGj/LlBs2ieUZNorhHDKK5Q=='
+# 'imdbpy2010' account.
+_imdbpy2010_cookie_id = 'QrCdxVi+L+WgqOLrQJJgBgRRXGInphxiBPU/YXSFDyExMFzCp6YcYgSVXyEUhS/xMID8wqemHGID4DlntwZ49vemP5UXsAxiJ4D6goSmHGIgNT9hMXBaRSF2vMS3phxB0bVfQiQlP1RxdrzhB6YcRHFASyIhQVowwXCKtDSlD2YhgRvxBsCKtGemHBKH9mxSI='
+_imdbpy2010_cookie_uu = 'oiEo2yoJFCA2Zbn/o7Z1LAPIwotAu6QdALv3foDb1x5F/tdrFY63XkSfty4kntS8Y8jkHSDLt3406+d+JThEilPI0mtTaOQdA/t2/iErp22jaLdeVU5ya4PIREpj7HFdpzhEHadcIAngSER50IoHDpD6Bz4Qy3b+UIhE/hBbhz5Q63ceA2hEvhPo5B0FnrL9Q8jkWjDIbA0Au3d+AOtnXoCIRL4Q28c+UOtnXpP4RL4T6OQdA+6ijUCI5B0AW2d+UOtnXpPYRL4T6OQdA8jkTUOYlC0A=='
+# old 'IMDbPYweb' account.
+_old_IMDbPYweb_cookie_id = 'rH1jNAkjTlNXvHolvBVBsgaPICNZbNdjVjzFwzas9JRmusdjVoqBs/Hs12NR+1WFxEoR9bGKEDUg6sNlADqXwkas12N131Rwdb+UQNGKN8PWrNdjcdqBQVLq8mbGDHP3hqzxhbD692NQi9D0JjpBtRaPIbP1zNdjUOqENQYv1ADWrNcT9vyXU1'
+_old_IMDbPYweb_cookie_uu = 'su4/m8cho4c6HP+W1qgq6wchOmhnF0w+lIWvHjRUPJ6nRA9sccEafjGADJ6hQGrMd4GKqLcz2X4z5+w+M4OIKnRn7FpENH7dxDQu3bQEHyx0ZEyeRFTPHfQEX03XF+yeN1dsPpcXaqjUZAw+lGRfXRQEfz3RIX9IgVEffdBAHw2wQXyf9xdMPrQELw0QNB8dsffsqcdQemjPB0w+moLcPh0JrKrHJ9hjBzdMPpcXTH7XRwwOk='
+# old 'IMDbPYweb' account values (as of 2012-12-30)
+_IMDbPYweb_cookie_id = 'BCYjtpb46Go0cMHAMewWZEauhwqPL7ASCPpPVNutu6BuayHZd0U6Dk3UAqVlEM8DHLDsSr02RGQn5ff3245-R4A130NAWJ_5yqXx7X-zJey8vQM8JKdv3rTUSEJznJQlojUW1Bije-Q0FXAixs4I0sePWhd_tA41i-9AF2q3lPmaksram6ilMhN9i3IPESW1PMbk'
+_IMDbPYweb_cookie_uu = 'BCYttQjEMc-NyUdFUGxThidAnBo7wwalEzj4un9uzf2XoEjtqDhNfrH7bOSuwlRkMEQ11SNyTajl-b9Q-21m4HwYu0e3jXZrjYLXLYzFkrEroCDyUREqaTwPJPSjGtFmvlaVBZEZmsWpaxe18DT5KiygKyGPZKH78Xu4im6ba-Sd31WvbXHzP8KGXPpGjhhVuv7Dcv314HCWkE832Srf9ya-Uv0FdGAmYyLbIAXuxnvpYQd6oZ8-CYkSGLIqcKWdrf5S'
+# 'IMDbPY2013' account
+_IMDbPY2013_cookie_id = 'BCYmoyqSm2WglmOzG-SrFWSvVpxsTZOB0qEOOqmAwCBxCbaNgKOxd0DTKzUvt7t04Pya5gV2tUrpDmYxrc1Dr54DQj2UXI7QI35__M5-HI2KrbOI3PjDz6M-_U3HG8topMfN64R24tmBixoZhMYXVaEc556lf0Z4gQNJVYRANXvwytP5v1lpfeToRlu9aVJwN4kT'
+_IMDbPY2013_cookie_uu = 'BCYquDS8Y2i8R1pJxS4nB77YrhjHHXeOea2Xl9KtZvE6RZKVfMvzTGU4Vl5-yxfPbgRSiFJasyf-hhPuVvXyaHlfeBjNlbFT8hz2HzFFkQ_SxKxq05J51gi7Fv4SaAws1M-i7zmQ1TRunfJqCVIYqPwIs2NO7s4_YDH2ZoISVGLgca8OY2K58HychOZB1oRWHVeAJNhLJMrCWJBuGRLCNnQK5X9tA0dPPntr2Ussy0ouul-N1GQz-8y5vda3JJ_C6xkwmHcA6JrOdOFO_HqMWjVSXuxGEdrXC919JM9H0vooVvKeVgAEJnTh2GiVlUJUoH3c'

-# imdbpy2010 account.
-#_cookie_id = 'QrCdxVi+L+WgqOLrQJJgBgRRXGInphxiBPU/YXSFDyExMFzCp6YcYgSVXyEUhS/xMID8wqemHGID4DlntwZ49vemP5UXsAxiJ4D6goSmHGIgNT9hMXBaRSF2vMS3phxB0bVfQiQlP1RxdrzhB6YcRHFASyIhQVowwXCKtDSlD2YhgRvxBsCKtGemHBKH9mxSI='
-#_cookie_uu = 'oiEo2yoJFCA2Zbn/o7Z1LAPIwotAu6QdALv3foDb1x5F/tdrFY63XkSfty4kntS8Y8jkHSDLt3406+d+JThEilPI0mtTaOQdA/t2/iErp22jaLdeVU5ya4PIREpj7HFdpzhEHadcIAngSER50IoHDpD6Bz4Qy3b+UIhE/hBbhz5Q63ceA2hEvhPo5B0FnrL9Q8jkWjDIbA0Au3d+AOtnXoCIRL4Q28c+UOtnXpP4RL4T6OQdA+6ijUCI5B0AW2d+UOtnXpPYRL4T6OQdA8jkTUOYlC0A=='
+# Currently used account.
+_cookie_id = _IMDbPY2013_cookie_id
+_cookie_uu = _IMDbPY2013_cookie_uu


 class _FakeURLOpener(object):
@ -141,9 +150,10 @@ class IMDbURLopener(FancyURLopener):
        for header in ('User-Agent', 'User-agent', 'user-agent'):
            self.del_header(header)
        self.set_header('User-Agent', 'Mozilla/5.0')
+        self.set_header('Accept-Language', 'en-us,en;q=0.5')
        # XXX: This class is used also to perform "Exact Primary
        #      [Title|Name]" searches, and so by default the cookie is set.
-        c_header = 'id=%s; uu=%s' % (_cookie_id, _cookie_uu)
+        c_header = 'uu=%s; id=%s' % (_cookie_uu, _cookie_id)
        self.set_header('Cookie', c_header)

    def get_proxy(self):
@ -199,12 +209,11 @@ class IMDbURLopener(FancyURLopener):
            server_encode = uopener.info().getparam('charset')
            # Otherwise, look at the content-type HTML meta tag.
            if server_encode is None and content:
-                first_bytes = content[:512]
-                begin_h = first_bytes.find('text/html; charset=')
+                begin_h = content.find('text/html; charset=')
                if begin_h != -1:
-                    end_h = first_bytes[19+begin_h:].find('"')
+                    end_h = content[19+begin_h:].find('"')
                    if end_h != -1:
-                        server_encode = first_bytes[19+begin_h:19+begin_h+end_h]
+                        server_encode = content[19+begin_h:19+begin_h+end_h]
            if server_encode:
                try:
                    if lookup(server_encode):
@ -455,16 +464,16 @@ class IMDbHTTPAccessSystem(IMDbBase):
        results is the maximum number of results to be retrieved."""
        if isinstance(ton, unicode):
            try:
-                ton = ton.encode('iso8859-1')
+                ton = ton.encode('utf-8')
            except Exception, e:
                try:
-                    ton = ton.encode('utf-8')
+                    ton = ton.encode('iso8859-1')
                except Exception, e:
                    pass
        ##params = 'q=%s&%s=on&mx=%s' % (quote_plus(ton), kind, str(results))
-        params = 'q=%s;s=%s;mx=%s' % (quote_plus(ton), kind, str(results))
+        params = 'q=%s&s=%s&mx=%s' % (quote_plus(ton), kind, str(results))
        if kind == 'ep':
-            params = params.replace('s=ep;', 's=tt;ttype=ep;', 1)
+            params = params.replace('s=ep&', 's=tt&ttype=ep&', 1)
        cont = self._retrieve(self.urls['find'] % params)
        #print 'URL:', imdbURL_find % params
        if cont.find('Your search returned more than') == -1 or \
@ -472,7 +481,7 @@ class IMDbHTTPAccessSystem(IMDbBase):
            return cont
        # The retrieved page contains no results, because too many
        # titles or names contain the string we're looking for.
-        params = 'q=%s;ls=%s;lm=0' % (quote_plus(ton), kind)
+        params = 'q=%s&ls=%s&lm=0' % (quote_plus(ton), kind)
        size = 131072 + results * 512
        return self._retrieve(self.urls['find'] % params, size=size)

@ -587,6 +596,10 @@ class IMDbHTTPAccessSystem(IMDbBase):
        cont = self._retrieve(self.urls['movie_main'] % movieID + 'recommendations')
        return self.mProxy.rec_parser.parse(cont)

+    def get_movie_critic_reviews(self, movieID):
+        cont = self._retrieve(self.urls['movie_main'] % movieID + 'criticreviews')
+        return self.mProxy.criticrev_parser.parse(cont)
+
    def get_movie_external_reviews(self, movieID):
        cont = self._retrieve(self.urls['movie_main'] % movieID + 'externalreviews')
        return self.mProxy.externalrev_parser.parse(cont)
@ -754,7 +767,7 @@ class IMDbHTTPAccessSystem(IMDbBase):
        return self.pProxy.person_keywords_parser.parse(cont)

    def _search_character(self, name, results):
-        cont = self._get_search_content('char', name, results)
+        cont = self._get_search_content('ch', name, results)
        return self.scProxy.search_character_parser.parse(cont, results=results)['data']

    def get_character_main(self, characterID):
--- a/lib/imdb/parser/http/movieParser.py
+++ b/lib/imdb/parser/http/movieParser.py
@ -9,7 +9,7 @@ pages would be:
    plot summary:       http://akas.imdb.com/title/tt0094226/plotsummary
    ...and so on...

-Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
+Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>

 This program is free software; you can redistribute it and/or modify
@ -531,9 +531,6 @@ class DOMHTMLMovieParser(DOMParserBase):
 def _process_plotsummary(x):
    """Process a plot (contributed by Rdian06)."""
    xauthor = x.get('author')
-    if xauthor:
-        xauthor = xauthor.replace('{', '<').replace('}', '>').replace('(',
-                                    '<').replace(')', '>').strip()
    xplot = x.get('plot', u'').strip()
    if xauthor:
        xplot += u'::%s' % xauthor
@ -555,17 +552,20 @@ class DOMHTMLPlotParser(DOMParserBase):
    # Notice that recently IMDb started to put the email of the
    # author only in the link, that we're not collecting, here.
    extractors = [Extractor(label='plot',
-                    path="//p[@class='plotpar']",
+                            path="//ul[@class='zebraList']//p",
                            attrs=Attribute(key='plot',
                                            multi=True,
-                            path={'plot': './text()',
-                                'author': './i/a/text()'},
+                                            path={'plot': './text()[1]',
+                                                  'author': './span/em/a/text()'},
                                            postprocess=_process_plotsummary))]


 def _process_award(x):
    award = {}
-    award['award'] = x.get('award').strip()
+    _award = x.get('award')
+    if _award is not None:
+        _award = _award.strip()
+    award['award'] = _award
    if not award['award']:
        return {}
    award['year'] = x.get('year').strip()
@ -709,10 +709,16 @@ class DOMHTMLTaglinesParser(DOMParserBase):
        result = tparser.parse(taglines_html_string)
    """
    extractors = [Extractor(label='taglines',
-                            path="//div[@id='tn15content']/p",
-                            attrs=Attribute(key='taglines', multi=True,
+                            path='//*[contains(concat(" ", normalize-space(@class), " "), " soda ")]',
+                            attrs=Attribute(key='taglines',
+                                            multi=True,
                                            path="./text()"))]

+    def postprocess_data(self, data):
+        if 'taglines' in data:
+            data['taglines'] = [tagline.strip() for tagline in data['taglines']]
+        return data
+

 class DOMHTMLKeywordsParser(DOMParserBase):
    """Parser for the "keywords" page of a given movie.
@ -785,9 +791,9 @@ class DOMHTMLSoundtrackParser(DOMHTMLAlternateVersionsParser):
        ]

    def postprocess_data(self, data):
-        if 'soundtrack' in data:
+        if 'alternate versions' in data:
            nd = []
-            for x in data['soundtrack']:
+            for x in data['alternate versions']:
                ds = x.split('\n')
                title = ds[0]
                if title[0] == '"' and title[-1] == '"':
@ -846,6 +852,13 @@ class DOMHTMLCrazyCreditsParser(DOMParserBase):
                                    x.replace('\n', ' ').replace('  ', ' ')))]


+def _process_goof(x):
+    if x['spoiler_category']:
+        return x['spoiler_category'].strip() + ': SPOILER: ' + x['text'].strip()
+    else:
+        return x['category'].strip() + ': ' + x['text'].strip()
+
+
 class DOMHTMLGoofsParser(DOMParserBase):
    """Parser for the "goofs" page of a given movie.
    The page should be provided as a string, as taken from
@ -858,9 +871,14 @@ class DOMHTMLGoofsParser(DOMParserBase):
    """
    _defGetRefs = True

-    extractors = [Extractor(label='goofs', path="//ul[@class='trivia']/li",
-                    attrs=Attribute(key='goofs', multi=True, path=".//text()",
-                        postprocess=lambda x: (x or u'').strip()))]
+    extractors = [Extractor(label='goofs', path="//div[@class='soda odd']",
+                    attrs=Attribute(key='goofs', multi=True,
+                        path={
+                              'text':"./text()",
+                              'category':'./preceding-sibling::h4[1]/text()',
+                              'spoiler_category': './h4/text()'
+                        },
+                        postprocess=_process_goof))]


 class DOMHTMLQuotesParser(DOMParserBase):
@ -876,9 +894,16 @@ class DOMHTMLQuotesParser(DOMParserBase):
    _defGetRefs = True

    extractors = [
-        Extractor(label='quotes',
-            path="//div[@class='_imdbpy']",
-            attrs=Attribute(key='quotes',
+        Extractor(label='quotes_odd',
+            path="//div[@class='quote soda odd']",
+            attrs=Attribute(key='quotes_odd',
+                multi=True,
+                path=".//text()",
+                postprocess=lambda x: x.strip().replace(' \n',
+                            '::').replace('::\n', '::').replace('\n', ' '))),
+        Extractor(label='quotes_even',
+            path="//div[@class='quote soda even']",
+            attrs=Attribute(key='quotes_even',
                multi=True,
                path=".//text()",
                postprocess=lambda x: x.strip().replace(' \n',
@ -886,27 +911,23 @@ class DOMHTMLQuotesParser(DOMParserBase):
        ]

    preprocessors = [
-        (re.compile('(<a name="?qt[0-9]{7}"?></a>)', re.I),
-            r'\1<div class="_imdbpy">'),
-        (re.compile('<hr width="30%">', re.I), '</div>'),
-        (re.compile('<hr/>', re.I), '</div>'),
-        (re.compile('<script.*?</script>', re.I|re.S), ''),
-        # For BeautifulSoup.
-        (re.compile('<!-- sid: t-channel : MIDDLE_CENTER -->', re.I), '</div>')
+        (re.compile('<a href="#" class="hidesoda hidden">Hide options</a><br>', re.I), '')
    ]

    def preprocess_dom(self, dom):
        # Remove "link this quote" links.
-        for qLink in self.xpath(dom, "//p[@class='linksoda']"):
+        for qLink in self.xpath(dom, "//span[@class='linksoda']"):
+            qLink.drop_tree()
+        for qLink in self.xpath(dom, "//div[@class='sharesoda_pre']"):
            qLink.drop_tree()
        return dom

    def postprocess_data(self, data):
-        if 'quotes' not in data:
+        quotes = data.get('quotes_odd', []) + data.get('quotes_even', [])
+        if not quotes:
            return {}
-        for idx, quote in enumerate(data['quotes']):
-            data['quotes'][idx] = quote.split('::')
-        return data
+        quotes = [q.split('::') for q in quotes]
+        return {'quotes': quotes}


 class DOMHTMLReleaseinfoParser(DOMParserBase):
@ -920,13 +941,13 @@ class DOMHTMLReleaseinfoParser(DOMParserBase):
        result = rdparser.parse(releaseinfo_html_string)
    """
    extractors = [Extractor(label='release dates',
-                    path="//th[@class='xxxx']/../../tr",
+                    path="//table[@id='release_dates']//tr",
                    attrs=Attribute(key='release dates', multi=True,
                        path={'country': ".//td[1]//text()",
                            'date': ".//td[2]//text()",
                            'notes': ".//td[3]//text()"})),
                Extractor(label='akas',
-                    path="//div[@class='_imdbpy_akas']/table/tr",
+                    path="//table[@id='akas']//tr",
                    attrs=Attribute(key='akas', multi=True,
                        path={'title': "./td[1]/text()",
                            'countries': "./td[2]/text()"}))]
@ -961,7 +982,7 @@ class DOMHTMLReleaseinfoParser(DOMParserBase):
            title = (aka.get('title') or '').strip()
            if not title:
                continue
-            countries = (aka.get('countries') or '').split('/')
+            countries = (aka.get('countries') or '').split(',')
            if not countries:
                nakas.append(title)
            else:
@ -1135,6 +1156,27 @@ def _normalize_href(href):
        href = '%s%s' % (imdbURL_base, href)
    return href

+class DOMHTMLCriticReviewsParser(DOMParserBase):
+    """Parser for the "critic reviews" pages of a given movie.
+    The page should be provided as a string, as taken from
+    the akas.imdb.com server.  The final result will be a
+    dictionary, with a key for every relevant section.
+
+    Example:
+        osparser = DOMHTMLCriticReviewsParser()
+        result = osparser.parse(officialsites_html_string)
+    """
+    kind = 'critic reviews'
+
+    extractors = [
+        Extractor(label='metascore',
+                path="//div[@class='metascore_wrap']/div/span",
+                attrs=Attribute(key='metascore',
+                                path=".//text()")),
+        Extractor(label='metacritic url',
+                path="//div[@class='article']/div[@class='see-more']/a",
+                attrs=Attribute(key='metacritic url',
+                                path="./@href")) ]
    
 class DOMHTMLOfficialsitesParser(DOMParserBase):
    """Parser for the "official sites", "external reviews", "newsgroup
@ -1471,6 +1513,14 @@ class DOMHTMLSeasonEpisodesParser(DOMParserBase):
        try: selected_season = int(selected_season)
        except: pass
        nd = {selected_season: {}}
+        if 'episode -1' in data:
+          counter = 1
+          for episode in data['episode -1']:
+            while 'episode %d' % counter in data:
+              counter += 1
+            k = 'episode %d' % counter
+            data[k] = [episode]
+          del data['episode -1']
        for episode_nr, episode in data.iteritems():
            if not (episode and episode[0] and
                    episode_nr.startswith('episode ')):
@ -1860,6 +1910,8 @@ _OBJECTS = {
    'releasedates_parser':  ((DOMHTMLReleaseinfoParser,), None),
    'ratings_parser':  ((DOMHTMLRatingsParser,), None),
    'officialsites_parser':  ((DOMHTMLOfficialsitesParser,), None),
+    'criticrev_parser':  ((DOMHTMLCriticReviewsParser,),
+                            {'kind': 'critic reviews'}),
    'externalrev_parser':  ((DOMHTMLOfficialsitesParser,),
                            {'kind': 'external reviews'}),
    'newsgrouprev_parser':  ((DOMHTMLOfficialsitesParser,),
--- a/lib/imdb/parser/http/personParser.py
+++ b/lib/imdb/parser/http/personParser.py
@ -8,7 +8,7 @@ E.g., for "Mel Gibson" the referred pages would be:
    biography:      http://akas.imdb.com/name/nm0000154/bio
    ...and so on...

-Copyright 2004-20101 Davide Alberani <da@erlug.linux.it>
+Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>

 This program is free software; you can redistribute it and/or modify
@ -60,6 +60,7 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
        result = cparser.parse(categorized_html_string)
    """
    _containsObjects = True
+    _name_imdb_index = re.compile(r'\([IVXLCDM]+\)')

    _birth_attrs = [Attribute(key='birth date',
                        path='.//time[@itemprop="birthDate"]/@datetime'),
@ -100,6 +101,10 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
                            path=".//text()",
                            postprocess=lambda x: analyze_name(x,
                                                               canonical=1))),
+            Extractor(label='name_index',
+                        path="//h1[@class='header']/span[1]",
+                        attrs=Attribute(key='name_index',
+                            path="./text()")),

            Extractor(label='birth info',
                        path="//div[h4='Born:']",
@ -110,7 +115,7 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
                        attrs=_death_attrs),

            Extractor(label='headshot',
-                        path="//td[@id='img_primary']/a",
+                        path="//td[@id='img_primary']/div[@class='image']/a",
                        attrs=Attribute(key='headshot',
                            path="./img/@src")),

@ -152,6 +157,11 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
        for what in 'birth date', 'death date':
            if what in data and not data[what]:
                del data[what]
+        name_index = (data.get('name_index') or '').strip()
+        if name_index:
+            if self._name_imdb_index.match(name_index):
+                data['imdbIndex'] = name_index[1:-1]
+            del data['name_index']
        # XXX: the code below is for backwards compatibility
        # probably could be removed
        for key in data.keys():
@ -220,13 +230,13 @@ class DOMHTMLBioParser(DOMParserBase):
                        attrs=Attribute(key='headshot',
                            path="./img/@src")),
            Extractor(label='birth info',
-                        path="//div[h5='Date of Birth']",
+                        path="//table[@id='overviewTable']//td[text()='Date of Birth']/following-sibling::td[1]",
                        attrs=_birth_attrs),
            Extractor(label='death info',
-                        path="//div[h5='Date of Death']",
+                        path="//table[@id='overviewTable']//td[text()='Date of Death']/following-sibling::td[1]",
                        attrs=_death_attrs),
            Extractor(label='nick names',
-                        path="//div[h5='Nickname']",
+                        path="//table[@id='overviewTable']//td[text()='Nickenames']/following-sibling::td[1]",
                        attrs=Attribute(key='nick names',
                            path="./text()",
                            joiner='|',
@ -234,25 +244,25 @@ class DOMHTMLBioParser(DOMParserBase):
                                    '::(', 1) for n in x.split('|')
                                    if n.strip()])),
            Extractor(label='birth name',
-                        path="//div[h5='Birth Name']",
+                        path="//table[@id='overviewTable']//td[text()='Birth Name']/following-sibling::td[1]",
                        attrs=Attribute(key='birth name',
                            path="./text()",
                            postprocess=lambda x: canonicalName(x.strip()))),
            Extractor(label='height',
-                        path="//div[h5='Height']",
+                path="//table[@id='overviewTable']//td[text()='Height']/following-sibling::td[1]",
                        attrs=Attribute(key='height',
                            path="./text()",
                            postprocess=lambda x: x.strip())),
            Extractor(label='mini biography',
-                        path="//div[h5='Mini Biography']",
+                        path="//a[@name='mini_bio']/following-sibling::div[1 = count(preceding-sibling::a[1] | ../a[@name='mini_bio'])]",
                        attrs=Attribute(key='mini biography',
                            multi=True,
                            path={
-                                'bio': "./p//text()",
-                                'by': "./b/following-sibling::a/text()"
+                                'bio': ".//text()",
+                                'by': ".//a[@name='ba']//text()"
                                },
                            postprocess=lambda x: "%s::%s" % \
-                                (x.get('bio').strip(),
+                                ((x.get('bio') or u'').split('- IMDb Mini Biography By:')[0].strip(),
                                (x.get('by') or u'').strip() or u'Anonymous'))),
            Extractor(label='spouse',
                        path="//div[h5='Spouse']/table/tr",
--- a/lib/imdb/parser/http/searchCharacterParser.py
+++ b/lib/imdb/parser/http/searchCharacterParser.py
@ -5,9 +5,9 @@ This module provides the HTMLSearchCharacterParser class (and the
 search_character_parser instance), used to parse the results of a search
 for a given character.
 E.g., when searching for the name "Jesse James", the parsed page would be:
-    http://akas.imdb.com/find?s=Characters;mx=20;q=Jesse+James
+    http://akas.imdb.com/find?s=ch;mx=20;q=Jesse+James

-Copyright 2007-2009 Davide Alberani <da@erlug.linux.it>
+Copyright 2007-2012 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>

 This program is free software; you can redistribute it and/or modify
@ -42,7 +42,7 @@ class DOMBasicCharacterParser(DOMBasicMovieParser):

 class DOMHTMLSearchCharacterParser(DOMHTMLSearchMovieParser):
    _BaseParser = DOMBasicCharacterParser
-    _notDirectHitTitle = '<title>imdb search'
+    _notDirectHitTitle = '<title>find - imdb'
    _titleBuilder = lambda self, x: build_name(x, canonical=False)
    _linkPrefix = '/character/ch'

@ -57,7 +57,7 @@ class DOMHTMLSearchCharacterParser(DOMHTMLSearchMovieParser):
                            {'name': x.get('name')}
                        ))]
    extractors = [Extractor(label='search',
-                            path="//td[3]/a[starts-with(@href, " \
+                            path="//td[@class='result_text']/a[starts-with(@href, " \
                                    "'/character/ch')]/..",
                            attrs=_attrs)]

--- a/lib/imdb/parser/http/searchCompanyParser.py
+++ b/lib/imdb/parser/http/searchCompanyParser.py
@ -7,7 +7,7 @@ for a given company.
 E.g., when searching for the name "Columbia Pictures", the parsed page would be:
    http://akas.imdb.com/find?s=co;mx=20;q=Columbia+Pictures

-Copyright 2008-2009 Davide Alberani <da@erlug.linux.it>
+Copyright 2008-2012 Davide Alberani <da@erlug.linux.it>
          2008 H. Turgut Uyar <uyar@tekir.org>

 This program is free software; you can redistribute it and/or modify
@ -42,7 +42,7 @@ class DOMBasicCompanyParser(DOMBasicMovieParser):

 class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser):
    _BaseParser = DOMBasicCompanyParser
-    _notDirectHitTitle = '<title>imdb company'
+    _notDirectHitTitle = '<title>find - imdb'
    _titleBuilder = lambda self, x: build_company_name(x)
    _linkPrefix = '/company/co'

@ -59,7 +59,7 @@ class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser):
                                                or u''), stripNotes=True)
                        ))]
    extractors = [Extractor(label='search',
-                            path="//td[3]/a[starts-with(@href, " \
+                            path="//td[@class='result_text']/a[starts-with(@href, " \
                                    "'/company/co')]/..",
                            attrs=_attrs)]

--- a/lib/imdb/parser/http/searchMovieParser.py
+++ b/lib/imdb/parser/http/searchMovieParser.py
@ -8,7 +8,7 @@ E.g., for when searching for the title "the passion", the parsed
 page would be:
    http://akas.imdb.com/find?q=the+passion&tt=on&mx=20

-Copyright 2004-2010 Davide Alberani <da@erlug.linux.it>
+Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>

 This program is free software; you can redistribute it and/or modify
@ -77,7 +77,7 @@ class DOMBasicMovieParser(DOMParserBase):
 def custom_analyze_title(title):
    """Remove garbage notes after the (year), (year/imdbIndex) or (year) (TV)"""
    # XXX: very crappy. :-(
-    nt = title.split('    ')[0]
+    nt = title.split(' aka ')[0]
    if nt:
        title = nt
    if not title:
@ -92,7 +92,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
    "new search system" is used, for movies."""

    _BaseParser = DOMBasicMovieParser
-    _notDirectHitTitle = '<title>imdb title'
+    _notDirectHitTitle = '<title>find - imdb</title>'
    _titleBuilder = lambda self, x: build_title(x)
    _linkPrefix = '/title/tt'

@ -101,8 +101,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
                        path={
                            'link': "./a[1]/@href",
                            'info': ".//text()",
-                            #'akas': ".//div[@class='_imdbpyAKA']//text()"
-                            'akas': ".//p[@class='find-aka']//text()"
+                            'akas': "./i//text()"
                            },
                        postprocess=lambda x: (
                            analyze_imdbid(x.get('link') or u''),
@ -110,7 +109,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
                            x.get('akas')
                        ))]
    extractors = [Extractor(label='search',
-                        path="//td[3]/a[starts-with(@href, '/title/tt')]/..",
+                        path="//td[@class='result_text']",
                        attrs=_attrs)]
    def _init(self):
        self.url = u''
@ -119,14 +118,11 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
        self.url = u''

    def preprocess_string(self, html_string):
-        if self._notDirectHitTitle in html_string[:1024].lower():
+        if self._notDirectHitTitle in html_string[:10240].lower():
            if self._linkPrefix == '/title/tt':
                # Only for movies.
+                # XXX (HTU): does this still apply?
                html_string = html_string.replace('(TV mini-series)', '(mini)')
-                html_string = html_string.replace('<p class="find-aka">',
-                        '<p class="find-aka">::')
-                #html_string = _reAKAStitles.sub(
-                #        r'<div class="_imdbpyAKA">\1::</div>\2', html_string)
            return html_string
        # Direct hit!
        dbme = self._BaseParser(useModule=self._useModule)
@ -141,7 +137,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
        title = self._titleBuilder(res[0][1])
        if not (link and title): return u''
        link = link.replace('http://pro.imdb.com', '')
-        new_html = '<td></td><td></td><td><a href="%s">%s</a></td>' % (link,
+        new_html = '<td class="result_text"><a href="%s">%s</a></td>' % (link,
                                                                    title)
        return new_html

@ -161,11 +157,14 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
                if not datum[0] and datum[1]:
                    continue
                if datum[2] is not None:
-                    akas = filter(None, datum[2].split('::'))
+                    #akas = filter(None, datum[2].split('::'))
                    if self._linkPrefix == '/title/tt':
-                        akas = [a.replace('" - ', '::').rstrip() for a in akas]
-                        akas = [a.replace('aka "', '', 1).replace('aka  "',
-                                '', 1).lstrip() for a in akas]
+                        # XXX (HTU): couldn't find a result with multiple akas
+                        aka = datum[2]
+                        akas = [aka[1:-1]]      # remove the quotes
+                        #akas = [a.replace('" - ', '::').rstrip() for a in akas]
+                        #akas = [a.replace('aka "', '', 1).replace('aka  "',
+                                #'', 1).lstrip() for a in akas]
                    datum[1]['akas'] = akas
                    data['data'][idx] = (datum[0], datum[1])
                else:
--- a/lib/imdb/parser/http/searchPersonParser.py
+++ b/lib/imdb/parser/http/searchPersonParser.py
@ -7,7 +7,7 @@ for a given person.
 E.g., when searching for the name "Mel Gibson", the parsed page would be:
    http://akas.imdb.com/find?q=Mel+Gibson&nm=on&mx=20

-Copyright 2004-2010 Davide Alberani <da@erlug.linux.it>
+Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>

 This program is free software; you can redistribute it and/or modify
@ -55,7 +55,7 @@ class DOMHTMLSearchPersonParser(DOMHTMLSearchMovieParser):
    """Parse the html page that the IMDb web server shows when the
    "new search system" is used, for persons."""
    _BaseParser = DOMBasicPersonParser
-    _notDirectHitTitle = '<title>imdb name'
+    _notDirectHitTitle = '<title>find - imdb'
    _titleBuilder = lambda self, x: build_name(x, canonical=True)
    _linkPrefix = '/name/nm'

@ -74,11 +74,11 @@ class DOMHTMLSearchPersonParser(DOMHTMLSearchMovieParser):
                                         canonical=1), x.get('akas')
                        ))]
    extractors = [Extractor(label='search',
-                            path="//td[3]/a[starts-with(@href, '/name/nm')]/..",
+                            path="//td[@class='result_text']/a[starts-with(@href, '/name/nm')]/..",
                            attrs=_attrs)]

    def preprocess_string(self, html_string):
-        if self._notDirectHitTitle in html_string[:1024].lower():
+        if self._notDirectHitTitle in html_string[:10240].lower():
            html_string = _reAKASp.sub(
                                    r'\1<div class="_imdbpyAKA">\2::</div>\3',
                                    html_string)
--- a/lib/imdb/parser/http/utils.py
+++ b/lib/imdb/parser/http/utils.py
@ -340,7 +340,7 @@ def build_movie(txt, movieID=None, roleID=None, status=None,
        title = title[:nidx].rstrip()
    if year:
        year = year.strip()
-        if title[-1] == ')':
+        if title[-1:] == ')':
            fpIdx = title.rfind('(')
            if fpIdx != -1:
                if notes: notes = '%s %s' % (title[fpIdx:], notes)
--- a/lib/imdb/parser/mobile/init.py
+++ b/lib/imdb/parser/mobile/init.py
@ -6,7 +6,7 @@ IMDb's data for mobile systems.
 the imdb.IMDb function will return an instance of this class when
 called with the 'accessSystem' argument set to "mobile".

-Copyright 2005-2011 Davide Alberani <da@erlug.linux.it>
+Copyright 2005-2012 Davide Alberani <da@erlug.linux.it>

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@ -193,7 +193,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
                                    title)
            return res
        tl = title[0].lower()
-        if not tl.startswith('imdb title'):
+        if not tl.startswith('find - imdb'):
            # a direct hit!
            title = _unHtml(title[0])
            mid = None
@ -211,7 +211,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
            # XXX: this results*3 prevents some recursion errors, but...
            #      it's not exactly understandable (i.e.: why 'results' is
            #      not enough to get all the results?)
-            lis = _findBetween(cont, 'td valign="top">', '</td>',
+            lis = _findBetween(cont, 'td class="result_text">', '</td>',
                                maxRes=results*3)
            for li in lis:
                akas = re_makas.findall(li)
@ -492,7 +492,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
            self._mobile_logger.warn('no title tag searching for name %s', name)
            return res
        nl = name[0].lower()
-        if not nl.startswith('imdb name'):
+        if not nl.startswith('find - imdb'):
            # a direct hit!
            name = _unHtml(name[0])
            name = name.replace('- Filmography by type' , '').strip()
@ -506,7 +506,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
                return res
            res[:] = [(str(pid[0]), analyze_name(name, canonical=1))]
        else:
-            lis = _findBetween(cont, 'td valign="top">', '</td>',
+            lis = _findBetween(cont, 'td class="result_text">', '</td>',
                                maxRes=results*3)
            for li in lis:
                akas = _findBetween(li, '<em>"', '"</em>')
@ -771,7 +771,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
        return {'data': d}

    def _search_character(self, name, results):
-        cont = subXMLRefs(self._get_search_content('char', name, results))
+        cont = subXMLRefs(self._get_search_content('ch', name, results))
        name = _findBetween(cont, '<title>', '</title>', maxRes=1)
        res = []
        if not name:
@ -779,8 +779,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
                                    name)
            return res
        nl = name[0].lower()
-        if not (nl.startswith('imdb search') or nl.startswith('imdb  search') \
-                or nl.startswith('imdb character')):
+        if not nl.startswith('find - imdb'):
            # a direct hit!
            name = _unHtml(name[0]).replace('(Character)', '').strip()
            pid = None
@ -793,12 +792,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
                return res
            res[:] = [(str(pid[0]), analyze_name(name))]
        else:
-            sects = _findBetween(cont, '<b>Popular Characters</b>', '</table>',
-                                maxRes=results*3)
-            sects += _findBetween(cont, '<b>Characters', '</table>',
-                                maxRes=results*3)
-            for sect in sects:
-                lis = _findBetween(sect, '<a href="/character/',
+            lis = _findBetween(cont, '<td class="result_text"',
                                ['<small', '</td>', '<br'])
            for li in lis:
                li = '<%s' % li
--- a/lib/imdb/parser/sql/init.py
+++ b/lib/imdb/parser/sql/init.py
@ -7,7 +7,7 @@ the SQLObject _AND_ SQLAlchemy Object Relational Managers is available.
 the imdb.IMDb function will return an instance of this class when
 called with the 'accessSystem' argument set to "sql", "database" or "db".

-Copyright 2005-2010 Davide Alberani <da@erlug.linux.it>
+Copyright 2005-2012 Davide Alberani <da@erlug.linux.it>

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@ -452,7 +452,12 @@ def get_movie_data(movieID, kindDict, fromAka=0, _table=None):
    else:
        if not fromAka: Table = Title
        else: Table = AkaTitle
+    try:
        m = Table.get(movieID)
+    except Exception, e:
+        _aux_logger.warn('Unable to fetch information for movieID %s: %s', movieID, e)
+        mdict = {}
+        return mdict
    mdict = {'title': m.title, 'kind': kindDict[m.kindID],
            'year': m.productionYear, 'imdbIndex': m.imdbIndex,
            'season': m.seasonNr, 'episode': m.episodeNr}
@ -825,14 +830,14 @@ class IMDbSqlAccessSystem(IMDbBase):
        imdbID = movie.imdbID
        if imdbID is not None: return '%07d' % imdbID
        m_dict = get_movie_data(movie.id, self._kind)
-        titline = build_title(m_dict, ptdf=1)
-        imdbID = self.title2imdbID(titline)
+        titline = build_title(m_dict, ptdf=0)
+        imdbID = self.title2imdbID(titline, m_dict['kind'])
        # If the imdbID was retrieved from the web and was not in the
        # database, update the database (ignoring errors, because it's
        # possibile that the current user has not update privileges).
        # There're times when I think I'm a genius; this one of
        # those times... <g>
-        if imdbID is not None:
+        if imdbID is not None and not isinstance(imdbID, list):
            try: movie.imdbID = int(imdbID)
            except: pass
        return imdbID
@ -847,9 +852,9 @@ class IMDbSqlAccessSystem(IMDbBase):
        imdbID = person.imdbID
        if imdbID is not None: return '%07d' % imdbID
        n_dict = {'name': person.name, 'imdbIndex': person.imdbIndex}
-        namline = build_name(n_dict, canonical=1)
+        namline = build_name(n_dict, canonical=False)
        imdbID = self.name2imdbID(namline)
-        if imdbID is not None:
+        if imdbID is not None and not isinstance(imdbID, list):
            try: person.imdbID = int(imdbID)
            except: pass
        return imdbID
@ -864,9 +869,9 @@ class IMDbSqlAccessSystem(IMDbBase):
        imdbID = character.imdbID
        if imdbID is not None: return '%07d' % imdbID
        n_dict = {'name': character.name, 'imdbIndex': character.imdbIndex}
-        namline = build_name(n_dict, canonical=1)
+        namline = build_name(n_dict, canonical=False)
        imdbID = self.character2imdbID(namline)
-        if imdbID is not None:
+        if imdbID is not None and not isinstance(imdbID, list):
            try: character.imdbID = int(imdbID)
            except: pass
        return imdbID
@ -883,7 +888,7 @@ class IMDbSqlAccessSystem(IMDbBase):
        n_dict = {'name': company.name, 'country': company.countryCode}
        namline = build_company_name(n_dict)
        imdbID = self.company2imdbID(namline)
-        if imdbID is not None:
+        if imdbID is not None and not isinstance(imdbID, list):
            try: company.imdbID = int(imdbID)
            except: pass
        return imdbID
@ -1116,6 +1121,7 @@ class IMDbSqlAccessSystem(IMDbBase):
        if mlinks:
            for ml in mlinks:
                lmovieData = get_movie_data(ml[0], self._kind)
+                if lmovieData:
                    m = Movie(movieID=ml[0], data=lmovieData, accessSystem='sql')
                    ml[0] = m
            res['connections'] = {}
--- a/lib/imdb/parser/sql/alchemyadapter.py
+++ b/lib/imdb/parser/sql/alchemyadapter.py
@ -466,6 +466,7 @@ class _AlchemyConnection(object):

 def setConnection(uri, tables, encoding='utf8', debug=False):
    """Set connection for every table."""
+    params = {'encoding': encoding}
    # FIXME: why on earth MySQL requires an additional parameter,
    #        is well beyond my understanding...
    if uri.startswith('mysql'):
@ -474,7 +475,11 @@ def setConnection(uri, tables, encoding='utf8', debug=False):
        else:
            uri += '?'
        uri += 'charset=%s' % encoding
-    params = {'encoding': encoding}
+        
+        # On some server configurations, we will need to explictly enable
+        # loading data from local files
+        params['local_infile'] = 1
+   
    if debug:
        params['echo'] = True
    if uri.startswith('ibm_db'):
--- a/lib/imdb/parser/sql/cutils.so
+++ b/lib/imdb/parser/sql/cutils.so
--- a/lib/imdb/parser/sql/objectadapter.py
+++ b/lib/imdb/parser/sql/objectadapter.py
@ -182,6 +182,10 @@ def setConnection(uri, tables, encoding='utf8', debug=False):
        kw['use_unicode'] = 1
        #kw['sqlobject_encoding'] = encoding
        kw['charset'] = encoding
+
+        # On some server configurations, we will need to explictly enable
+        # loading data from local files
+        kw['local_infile'] = 1
    conn = connectionForURI(uri, **kw)
    conn.debug = debug
    # XXX: doesn't work and a work-around was put in imdbpy2sql.py;
--- a/lib/imdb/utils.py
+++ b/lib/imdb/utils.py
@ -3,7 +3,7 @@ utils module (imdb package).

 This module provides basic utilities for the imdb package.

-Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
+Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
               2009 H. Turgut Uyar <uyar@tekir.org>

 This program is free software; you can redistribute it and/or modify
@ -189,10 +189,9 @@ _unicodeArticles = linguistics.toUnicode(_articles)
 articlesDicts = linguistics.articlesDictsForLang(None)
 spArticles = linguistics.spArticlesForLang(None)

-def canonicalTitle(title, lang=None):
+def canonicalTitle(title, lang=None, imdbIndex=None):
    """Return the title in the canonic format 'Movie Title, The';
-    beware that it doesn't handle long imdb titles, but only the
-    title portion, without year[/imdbIndex] or special markup.
+    beware that it doesn't handle long imdb titles.
    The 'lang' argument can be used to specify the language of the title.
    """
    isUnicode = isinstance(title, unicode)
@ -203,15 +202,19 @@ def canonicalTitle(title, lang=None):
    except IndexError:
        pass
    if isUnicode:
-        _format = u'%s, %s'
+        _format = u'%s%s, %s'
    else:
-        _format = '%s, %s'
+        _format = '%s%s, %s'
    ltitle = title.lower()
+    if imdbIndex:
+        imdbIndex = ' (%s)' % imdbIndex
+    else:
+        imdbIndex = ''
    spArticles = linguistics.spArticlesForLang(lang)
    for article in spArticles[isUnicode]:
        if ltitle.startswith(article):
            lart = len(article)
-            title = _format % (title[lart:], title[:lart])
+            title = _format % (title[lart:], imdbIndex, title[:lart])
            if article[-1] == ' ':
                title = title[:-1]
            break
@ -383,18 +386,42 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
    if title.endswith('(TV)'):
        kind = u'tv movie'
        title = title[:-4].rstrip()
+    elif title.endswith('(TV Movie)'):
+        kind = u'tv movie'
+        title = title[:-10].rstrip()
    elif title.endswith('(V)'):
        kind = u'video movie'
        title = title[:-3].rstrip()
-    elif title.endswith('(video)'):
+    elif title.lower().endswith('(video)'):
        kind = u'video movie'
        title = title[:-7].rstrip()
+    elif title.endswith('(TV Short)'):
+        kind = u'tv short'
+        title = title[:-10].rstrip()
+    elif title.endswith('(TV Mini-Series)'):
+        kind = u'tv mini series'
+        title = title[:-16].rstrip()
    elif title.endswith('(mini)'):
        kind = u'tv mini series'
        title = title[:-6].rstrip()
    elif title.endswith('(VG)'):
        kind = u'video game'
        title = title[:-4].rstrip()
+    elif title.endswith('(Video Game)'):
+        kind = u'video game'
+        title = title[:-12].rstrip()
+    elif title.endswith('(TV Series)'):
+        epindex = title.find('(TV Episode) - ')
+        if epindex >= 0:
+            # It's an episode of a series.
+            kind = u'episode'
+            series_info = analyze_title(title[epindex + 15:])
+            result['episode of'] = series_info.get('title')
+            result['series year'] = series_info.get('year')
+            title = title[:epindex]
+        else:
+            kind = u'tv series'
+            title = title[:-11].rstrip()
    # Search for the year and the optional imdbIndex (a roman number).
    yi = re_year_index.findall(title)
    if not yi:
@ -430,9 +457,6 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
        if not kind:
            kind = u'tv series'
        title = title[1:-1].strip()
-    elif title.endswith('(TV series)'):
-        kind = u'tv series'
-        title = title[:-11].rstrip()
    if not title:
        raise IMDbParserError('invalid title: "%s"' % original_t)
    if canonical is not None:
@ -489,7 +513,7 @@ def _convertTime(title, fromPTDFtoWEB=1, _emptyString=u''):

 def build_title(title_dict, canonical=None, canonicalSeries=None,
                canonicalEpisode=None, ptdf=0, lang=None, _doYear=1,
-                _emptyString=u''):
+                _emptyString=u'', appendKind=True):
    """Given a dictionary that represents a "long" IMDb title,
    return a string.

@ -511,6 +535,11 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
        doYear = 0
        if ptdf:
            doYear = 1
+        # XXX: for results coming from the new search page.
+        if not isinstance(episode_of, (dict, _Container)):
+            episode_of = {'title': episode_of, 'kind': 'tv series'}
+            if 'series year' in title_dict:
+                episode_of['year'] = title_dict['series year']
        pre_title = build_title(episode_of, canonical=canonicalSeries,
                                ptdf=0, _doYear=doYear,
                                _emptyString=_emptyString)
@ -545,12 +574,14 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
                    episode_title += '.%s' % episode
                episode_title += ')'
            episode_title = '{%s}' % episode_title
-        return '%s %s' % (pre_title, episode_title)
+        return _emptyString + '%s %s' % (_emptyString + pre_title,
+                            _emptyString + episode_title)
    title = title_dict.get('title', '')
+    imdbIndex = title_dict.get('imdbIndex', '')
    if not title: return _emptyString
    if canonical is not None:
        if canonical:
-            title = canonicalTitle(title, lang=lang)
+            title = canonicalTitle(title, lang=lang, imdbIndex=imdbIndex)
        else:
            title = normalizeTitle(title, lang=lang)
    if pre_title:
@ -558,15 +589,20 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
    if kind in (u'tv series', u'tv mini series'):
        title = '"%s"' % title
    if _doYear:
-        imdbIndex = title_dict.get('imdbIndex')
-        year = title_dict.get('year') or u'????'
+        year = title_dict.get('year') or '????'
        if isinstance(_emptyString, str):
            year = str(year)
+        imdbIndex = title_dict.get('imdbIndex')
+        if not ptdf:
+            if imdbIndex and (canonical is None or canonical):
+                title += ' (%s)' % imdbIndex
+            title += ' (%s)' % year
+        else:
            title += ' (%s' % year
-        if imdbIndex:
+            if imdbIndex and (canonical is None or canonical):
                title += '/%s' % imdbIndex
            title += ')'
-    if kind:
+    if appendKind and kind:
        if kind == 'tv movie':
            title += ' (TV)'
        elif kind == 'video movie':
--- a/lib/tvdb_api/tvdb_api.py
+++ b/lib/tvdb_api/tvdb_api.py
@ -11,6 +11,7 @@ __author__ = "dbr/Ben"
 __version__ = "1.9"

 import os
+import re
 import time
 import getpass
 import StringIO
@ -18,8 +19,10 @@ import tempfile
 import warnings
 import logging
 import zipfile
+import datetime as dt
 import requests
 import cachecontrol
+import xmltodict

 try:
    import xml.etree.cElementTree as ElementTree
@ -31,6 +34,7 @@ try:
 except ImportError:
    gzip = None

+from lib.dateutil.parser import parse
 from cachecontrol import caches

 from tvdb_ui import BaseUI, ConsoleUI
@ -560,35 +564,60 @@ class Tvdb:
        except requests.Timeout, e:
            raise tvdb_error("Connection timed out " + str(e.message) + " while loading URL " + str(url))

-        if 'application/zip' in resp.headers.get("Content-Type", '') and resp.ok:
+        def process(path, key, value):
+            key = key.lower()
+
+            # clean up value and do type changes
+            if value:
+                try:
+                    # convert to integer if needed
+                    if value.isdigit():
+                        value = int(value)
+                except:
+                    pass
+
+                if key in ['banner', 'fanart', 'poster']:
+                    value = self.config['url_artworkPrefix'] % (value)
+                else:
+                    value = self._cleanData(value)
+
+                try:
+                    if key == 'firstaired' and value in "0000-00-00":
+                        new_value = str(dt.date.fromordinal(1))
+                        new_value = re.sub("([-]0{2}){1,}", "", new_value)
+                        fixDate = parse(new_value, fuzzy=True).date()
+                        value = fixDate.strftime("%Y-%m-%d")
+                    elif key == 'firstaired':
+                        value = parse(value, fuzzy=True).date()
+                        value = value.strftime("%Y-%m-%d")
+                except:
+                    pass
+
+            value = self._cleanData(value)
+            return (key, value)
+
+        if resp.ok:
+            if 'application/zip' in resp.headers.get("Content-Type", ''):
                try:
                    # TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20]
                    log().debug("We recived a zip file unpacking now ...")
                    zipdata = StringIO.StringIO()
                    zipdata.write(resp.content)
                    myzipfile = zipfile.ZipFile(zipdata)
-                return myzipfile.read('%s.xml' % language)
+                    return xmltodict.parse(myzipfile.read('%s.xml' % language), postprocessor=process)
                except zipfile.BadZipfile:
                    raise tvdb_error("Bad zip file received from thetvdb.com, could not read it")
-
-        return resp.content if resp.ok else None
+            else:
+                return xmltodict.parse(resp.text.strip(), postprocessor=process)

    def _getetsrc(self, url, params=None, language=None):
        """Loads a URL using caching, returns an ElementTree of the source
        """
-        src = self._loadUrl(url, params=params, language=language)
        try:
-            # TVDB doesn't sanitize \r (CR) from user input in some fields,
-            # remove it to avoid errors. Change from SickBeard, from will14m
-            return ElementTree.fromstring(src.rstrip("\r")) if src else None
-        except SyntaxError:
            src = self._loadUrl(url, params=params, language=language)
-            try:
-                return ElementTree.fromstring(src.rstrip("\r")) if src else None
-            except SyntaxError, exceptionmsg:
-                errormsg = "There was an error with the XML retrieved from thetvdb.com:\n%s" % (
-                    exceptionmsg
-                )
+            src = [src[item] for item in src][0]
+        except:
+            errormsg = "There was an error with the XML retrieved from thetvdb.com:"

            if self.config['cache_enabled']:
                errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
@ -599,6 +628,8 @@ class Tvdb:
            errormsg += "\nhttp://dbr.lighthouseapp.com/projects/13342-tvdb_api/overview\n"
            raise tvdb_error(errormsg)

+        return src
+
    def _setItem(self, sid, seas, ep, attrib, value):
        """Creates a new episode, creating Show(), Season() and
        Episode()s as required. Called by _getShowData to populate show
@ -649,9 +680,8 @@ class Tvdb:
        log().debug("Searching for show %s" % series)
        self.config['params_getSeries']['seriesname'] = series
        seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
-        allSeries = list(dict((s.tag.lower(), s.text) for s in x.getchildren()) for x in seriesEt)

-        return allSeries
+        return [seriesEt[item] for item in seriesEt][0]

    def _getSeries(self, series):
        """This searches TheTVDB.com for the series name,
@ -798,24 +828,13 @@ class Tvdb:
            self.config['url_seriesInfo'] % (sid, getShowInLanguage)
        )

-        if seriesInfoEt is None: return False
-        for curInfo in seriesInfoEt.findall("Series")[0]:
-            tag = curInfo.tag.lower()
-            value = curInfo.text
-
-            if tag == 'seriesname' and value is None:
+        # check and make sure we have data to process and that it contains a series name
+        if seriesInfoEt is None or 'seriesname' not in seriesInfoEt['series']:
            return False

-            if value is not None:
-                if tag == 'id':
-                    value = int(value)
+        for k, v in seriesInfoEt['series'].items():
+            self._setShowData(sid, k, v)

-                if tag in ['banner', 'fanart', 'poster']:
-                    value = self.config['url_artworkPrefix'] % (value)
-                else:
-                    value = self._cleanData(value)
-
-            self._setShowData(sid, tag, value)
        if seriesSearch:
            return True

@ -837,63 +856,40 @@ class Tvdb:

        epsEt = self._getetsrc(url, language=language)

-        for cur_ep in epsEt.findall("Episode"):
-
+        for cur_ep in epsEt["episode"]:
            if self.config['dvdorder']:
                log().debug('Using DVD ordering.')
-                use_dvd = cur_ep.find('DVD_season').text != None and cur_ep.find('DVD_episodenumber').text != None
+                use_dvd = cur_ep['dvd_season'] != None and cur_ep['dvd_episodenumber'] != None
            else:
                use_dvd = False

            if use_dvd:
-                elem_seasnum, elem_epno = cur_ep.find('DVD_season'), cur_ep.find('DVD_episodenumber')
+                seasnum, epno = cur_ep['dvd_season'], cur_ep['dvd_episodenumber']
            else:
-                elem_seasnum, elem_epno = cur_ep.find('SeasonNumber'), cur_ep.find('EpisodeNumber')
-
-            if elem_seasnum is None or elem_epno is None:
+                seasnum, epno = cur_ep['seasonnumber'], cur_ep['episodenumber']

+            if seasnum is None or epno is None:
                log().warning("An episode has incomplete season/episode number (season: %r, episode: %r)" % (
-                    elem_seasnum, elem_epno))
-                log().debug(
-                    " ".join(
-                        "%r is %r" % (child.tag, child.text) for child in cur_ep.getchildren()))
-                # TODO: Should this happen?
+                    seasnum, epno))
                continue # Skip to next episode

-
            # float() is because https://github.com/dbr/tvnamer/issues/95 - should probably be fixed in TVDB data
-            seas_no = int(float(elem_seasnum.text))
-            ep_no = int(float(elem_epno.text))
+            seas_no = int(float(seasnum))
+            ep_no = int(float(epno))

-            useDVD = False
+            for k,v in cur_ep.items():
+                k = k.lower()

-            if (self.config['dvdorder']):
-                log().debug('DVD Order?  Yes')
-                useDVD = (cur_ep.find('DVD_season').text != None and cur_ep.find('DVD_episodenumber').text != None)
+                if v is not None:
+                    if k == 'id':
+                        v = int(v)
+
+                    if k == 'filename':
+                        v = self.config['url_artworkPrefix'] % (v)
                    else:
-                log().debug('DVD Order? No')
+                        v = self._cleanData(v)

-            if (useDVD):
-                log().debug('Use DVD Order? Yes')
-                seas_no = int(cur_ep.find('DVD_season').text)
-                ep_no = int(float(cur_ep.find('DVD_episodenumber').text))
-            else:
-                log().debug('Use DVD Order? No')
-                seas_no = int(cur_ep.find('SeasonNumber').text)
-                ep_no = int(cur_ep.find('EpisodeNumber').text)
-
-            for cur_item in cur_ep.getchildren():
-                tag = cur_item.tag.lower()
-                value = cur_item.text
-                if value is not None:
-                    if tag == 'id':
-                        value = int(value)
-
-                    if tag == 'filename':
-                        value = self.config['url_artworkPrefix'] % (value)
-                    else:
-                        value = self._cleanData(value)
-                self._setItem(sid, seas_no, ep_no, tag, value)
+                    self._setItem(sid, seas_no, ep_no, k, v)

        return True

--- a/lib/tvrage_api/tvrage_api.py
+++ b/lib/tvrage_api/tvrage_api.py
@ -24,6 +24,7 @@ import logging
 import datetime as dt
 import requests
 import cachecontrol
+import xmltodict

 try:
    import xml.etree.cElementTree as ElementTree
@ -37,9 +38,11 @@ from tvrage_ui import BaseUI
 from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound,
                               tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound)

+
 def log():
    return logging.getLogger("tvrage_api")

+
 def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
    """Retry calling the decorated function using an exponential backoff.

@ -83,6 +86,7 @@ def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):

    return deco_retry

+
 class ShowContainer(dict):
    """Simple dict that holds a series of Show instances
    """
@ -112,6 +116,7 @@ class ShowContainer(dict):
 class Show(dict):
    """Holds a dict of seasons, and show data.
    """
+
    def __init__(self):
        dict.__init__(self)
        self.data = {}
@ -261,8 +266,10 @@ class Episode(dict):
            if cur_value.find(unicode(term).lower()) > -1:
                return self

+
 class TVRage:
    """Create easy-to-use interface to name of season/episode name"""
+
    def __init__(self,
                 interactive=False,
                 select_first=False,
@ -390,9 +397,9 @@ class TVRage:

            # get response from TVRage
            if self.config['cache_enabled']:
-                resp = self.sess.get(url, cache_auto=True, params=params)
+                resp = self.sess.get(url.strip(), cache_auto=True, params=params)
            else:
-                resp = requests.get(url, params=params)
+                resp = requests.get(url.strip(), params=params)

        except requests.HTTPError, e:
            raise tvrage_error("HTTP error " + str(e.errno) + " while loading URL " + str(url))
@ -403,12 +410,8 @@ class TVRage:
        except requests.Timeout, e:
            raise tvrage_error("Connection timed out " + str(e.message) + " while loading URL " + str(url))

-        return resp.content if resp.ok else None
-
-    def _getetsrc(self, url, params=None):
-        """Loads a URL using caching, returns an ElementTree of the source
-        """
-        reDict = {
+        def remap_keys(path, key, value):
+            name_map = {
                'showid': 'id',
                'showname': 'seriesname',
                'name': 'seriesname',
@ -422,54 +425,59 @@ class TVRage:
                'title': 'episodename',
                'airdate': 'firstaired',
                'screencap': 'filename',
-            'seasonnum': 'episodenumber',
+                'seasonnum': 'episodenumber'
            }

-        robj = re.compile('|'.join(reDict.keys()))
-        src = self._loadUrl(url, params)
            try:
-            # TVRAGE doesn't sanitize \r (CR) from user input in some fields,
-            # remove it to avoid errors. Change from SickBeard, from will14m
-            xml = ElementTree.fromstring(src.rstrip("\r"))
-            tree = ElementTree.ElementTree(xml)
-            for elm in tree.findall('.//*'):
-                elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag)
+                key = name_map[key.lower()]
+            except (ValueError, TypeError, KeyError):
+                key.lower()
+
+            # clean up value and do type changes
+            if value:
+                if isinstance(value, dict):
+                    if key == 'network':
+                        value = value['#text']
+                    if key == 'genre':
+                        value = value['genre']
+                        if not isinstance(value, list):
+                            value = [value]
+                        value = '|' + '|'.join(value) + '|'

-                if elm.tag in 'firstaired':
                try:
-                        if elm.text in "0000-00-00":
-                            elm.text = str(dt.date.fromordinal(1))
-                        elm.text = re.sub("([-]0{2}){1,}", "", elm.text)
-                        fixDate = parse(elm.text, fuzzy=True).date()
-                        elm.text = fixDate.strftime("%Y-%m-%d")
+                    # convert to integer if needed
+                    if value.isdigit():
+                        value = int(value)
                except:
                    pass
-            return ElementTree.fromstring(ElementTree.tostring(xml))
-        except SyntaxError:
-            src = self._loadUrl(url, params)
-            try:
-                xml = ElementTree.fromstring(src.rstrip("\r"))
-                tree = ElementTree.ElementTree(xml)
-                for elm in tree.findall('.//*'):
-                    elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag)

-                    if elm.tag in 'firstaired' and elm.text:
-                        if elm.text == "0000-00-00":
-                            elm.text = str(dt.date.fromordinal(1))
                try:
-                            #month = strptime(match.group('air_month')[:3],'%b').tm_mon
-                            #day = re.sub("(st|nd|rd|th)", "", match.group('air_day'))
-                            #dtStr = '%s/%s/%s' % (year, month, day)
-
-                            fixDate = parse(elm.text, fuzzy=True)
-                            elm.text = fixDate.strftime("%Y-%m-%d")
+                    if key == 'firstaired' and value in "0000-00-00":
+                        new_value = str(dt.date.fromordinal(1))
+                        new_value = re.sub("([-]0{2}){1,}", "", new_value)
+                        fixDate = parse(new_value, fuzzy=True).date()
+                        value = fixDate.strftime("%Y-%m-%d")
+                    elif key == 'firstaired':
+                        value = parse(value, fuzzy=True).date()
+                        value = value.strftime("%Y-%m-%d")
                except:
                    pass
-                    return ElementTree.fromstring(ElementTree.tostring(xml))
-            except SyntaxError, exceptionmsg:
-                errormsg = "There was an error with the XML retrieved from tvrage.com:\n%s" % (
-                    exceptionmsg
-                )
+
+            value = self._cleanData(value)
+            return (key, value)
+
+        if resp.ok:
+            return xmltodict.parse(resp.text.strip(), postprocessor=remap_keys)
+
+    def _getetsrc(self, url, params=None):
+        """Loads a URL using caching, returns an ElementTree of the source
+        """
+
+        try:
+            src = self._loadUrl(url, params)
+            src = [src[item] for item in src][0]
+        except:
+            errormsg = "There was an error with the XML retrieved from tvrage.com"

            if self.config['cache_enabled']:
                errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
@ -479,6 +487,8 @@ class TVRage:
            errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on\n"
            raise tvrage_error(errormsg)

+        return src
+
    def _setItem(self, sid, seas, ep, attrib, value):
        """Creates a new episode, creating Show(), Season() and
        Episode()s as required. Called by _getShowData to populate show
@ -529,9 +539,8 @@ class TVRage:
        log().debug("Searching for show %s" % series)
        self.config['params_getSeries']['show'] = series
        seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
-        allSeries = list(dict((s.tag.lower(),s.text) for s in x.getchildren()) for x in seriesEt)

-        return allSeries
+        return [seriesEt[item] for item in seriesEt][0]

    def _getSeries(self, series):
        """This searches tvrage.com for the series name,
@ -568,60 +577,47 @@ class TVRage:
            self.config['params_seriesInfo']
        )

-        if seriesInfoEt is None: return False
-        for curInfo in seriesInfoEt:
-            tag = curInfo.tag.lower()
-            value = curInfo.text
-
-            if tag == 'seriesname' and value is None:
+        # check and make sure we have data to process and that it contains a series name
+        if seriesInfoEt is None or 'seriesname' not in seriesInfoEt:
            return False

-            if tag == 'id':
-                value = int(value)
+        for k, v in seriesInfoEt.items():
+            self._setShowData(sid, k, v)

-            if value is not None:
-                value = self._cleanData(value)
-
-            self._setShowData(sid, tag, value)
-        if seriesSearch: return True
-
-        try:
-            # Parse genre data
-            log().debug('Getting genres of %s' % (sid))
-            for genre in seriesInfoEt.find('genres'):
-                tag = genre.tag.lower()
-
-                value = genre.text
-                if value is not None:
-                    value = self._cleanData(value)
-
-                self._setShowData(sid, tag, value)
-        except Exception:
-            log().debug('No genres for %s' % (sid))
+        # series search ends here
+        if seriesSearch:
+            return True

        # Parse episode data
        log().debug('Getting all episodes of %s' % (sid))

        self.config['params_epInfo']['sid'] = sid
        epsEt = self._getetsrc(self.config['url_epInfo'], self.config['params_epInfo'])
-        for cur_list in epsEt.findall("Episodelist"):
-            for cur_seas in cur_list:
-                try:
-                    seas_no = int(cur_seas.attrib['no'])
-                    for cur_ep in cur_seas:
-                        ep_no = int(cur_ep.find('episodenumber').text)
+
+        for season in epsEt['Episodelist']['Season']:
+            episodes =  season['episode']
+            if not isinstance(episodes, list):
+                episodes = [episodes]
+
+            for episode in episodes:
+                seas_no = int(season['@no'])
+                ep_no = int(episode['episodenumber'])
                self._setItem(sid, seas_no, ep_no, 'seasonnumber', seas_no)
-                        for cur_item in cur_ep:
-                            tag = cur_item.tag.lower()

-                            value = cur_item.text
-                            if value is not None:
-                                if tag == 'id':
-                                    value = int(value)
+                for k,v in episode.items():
+                    try:
+                        k = k.lower()
+                        if v is not None:
+                            if k == 'link':
+                                v = v.rsplit('/', 1)[1]
+                                k = 'id'

-                                value = self._cleanData(value)
+                            if k == 'id':
+                                v = int(v)

-                            self._setItem(sid, seas_no, ep_no, tag, value)
+                            v = self._cleanData(v)
+
+                        self._setItem(sid, seas_no, ep_no, k, v)
                    except:
                        continue
        return True
@ -673,11 +669,13 @@ def main():
    grabs an episode name interactively.
    """
    import logging
+
    logging.basicConfig(level=logging.DEBUG)

    tvrage_instance = TVRage(cache=False)
    print tvrage_instance['Lost']['seriesname']
    print tvrage_instance['Lost'][1][4]['episodename']

+
 if __name__ == '__main__':
    main()
--- a/lib/xmltodict.py
+++ b/lib/xmltodict.py
@ -0,0 +1,359 @@
+#!/usr/bin/env python
+"Makes working with XML feel like you are working with JSON"
+
+from xml.parsers import expat
+from xml.sax.saxutils import XMLGenerator
+from xml.sax.xmlreader import AttributesImpl
+try:  # pragma no cover
+    from cStringIO import StringIO
+except ImportError:  # pragma no cover
+    try:
+        from StringIO import StringIO
+    except ImportError:
+        from io import StringIO
+try:  # pragma no cover
+    from collections import OrderedDict
+except ImportError:  # pragma no cover
+    try:
+        from ordereddict import OrderedDict
+    except ImportError:
+        OrderedDict = dict
+
+try:  # pragma no cover
+    _basestring = basestring
+except NameError:  # pragma no cover
+    _basestring = str
+try:  # pragma no cover
+    _unicode = unicode
+except NameError:  # pragma no cover
+    _unicode = str
+
+__author__ = 'Martin Blech'
+__version__ = '0.9.0'
+__license__ = 'MIT'
+
+
+class ParsingInterrupted(Exception):
+    pass
+
+
+class _DictSAXHandler(object):
+    def __init__(self,
+                 item_depth=0,
+                 item_callback=lambda *args: True,
+                 xml_attribs=True,
+                 attr_prefix='@',
+                 cdata_key='#text',
+                 force_cdata=False,
+                 cdata_separator='',
+                 postprocessor=None,
+                 dict_constructor=OrderedDict,
+                 strip_whitespace=True,
+                 namespace_separator=':',
+                 namespaces=None):
+        self.path = []
+        self.stack = []
+        self.data = None
+        self.item = None
+        self.item_depth = item_depth
+        self.xml_attribs = xml_attribs
+        self.item_callback = item_callback
+        self.attr_prefix = attr_prefix
+        self.cdata_key = cdata_key
+        self.force_cdata = force_cdata
+        self.cdata_separator = cdata_separator
+        self.postprocessor = postprocessor
+        self.dict_constructor = dict_constructor
+        self.strip_whitespace = strip_whitespace
+        self.namespace_separator = namespace_separator
+        self.namespaces = namespaces
+
+    def _build_name(self, full_name):
+        if not self.namespaces:
+            return full_name
+        i = full_name.rfind(self.namespace_separator)
+        if i == -1:
+            return full_name
+        namespace, name = full_name[:i], full_name[i+1:]
+        short_namespace = self.namespaces.get(namespace, namespace)
+        if not short_namespace:
+            return name
+        else:
+            return self.namespace_separator.join((short_namespace, name))
+
+    def _attrs_to_dict(self, attrs):
+        if isinstance(attrs, dict):
+            return attrs
+        return self.dict_constructor(zip(attrs[0::2], attrs[1::2]))
+
+    def startElement(self, full_name, attrs):
+        name = self._build_name(full_name)
+        attrs = self._attrs_to_dict(attrs)
+        self.path.append((name, attrs or None))
+        if len(self.path) > self.item_depth:
+            self.stack.append((self.item, self.data))
+            if self.xml_attribs:
+                attrs = self.dict_constructor(
+                    (self.attr_prefix+key, value)
+                    for (key, value) in attrs.items())
+            else:
+                attrs = None
+            self.item = attrs or None
+            self.data = None
+
+    def endElement(self, full_name):
+        name = self._build_name(full_name)
+        if len(self.path) == self.item_depth:
+            item = self.item
+            if item is None:
+                item = self.data
+            should_continue = self.item_callback(self.path, item)
+            if not should_continue:
+                raise ParsingInterrupted()
+        if len(self.stack):
+            item, data = self.item, self.data
+            self.item, self.data = self.stack.pop()
+            if self.strip_whitespace and data is not None:
+                data = data.strip() or None
+            if data and self.force_cdata and item is None:
+                item = self.dict_constructor()
+            if item is not None:
+                if data:
+                    self.push_data(item, self.cdata_key, data)
+                self.item = self.push_data(self.item, name, item)
+            else:
+                self.item = self.push_data(self.item, name, data)
+        else:
+            self.item = self.data = None
+        self.path.pop()
+
+    def characters(self, data):
+        if not self.data:
+            self.data = data
+        else:
+            self.data += self.cdata_separator + data
+
+    def push_data(self, item, key, data):
+        if self.postprocessor is not None:
+            result = self.postprocessor(self.path, key, data)
+            if result is None:
+                return item
+            key, data = result
+        if item is None:
+            item = self.dict_constructor()
+        try:
+            value = item[key]
+            if isinstance(value, list):
+                value.append(data)
+            else:
+                item[key] = [value, data]
+        except KeyError:
+            item[key] = data
+        return item
+
+
+def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
+          namespace_separator=':', **kwargs):
+    """Parse the given XML input and convert it into a dictionary.
+
+    `xml_input` can either be a `string` or a file-like object.
+
+    If `xml_attribs` is `True`, element attributes are put in the dictionary
+    among regular child elements, using `@` as a prefix to avoid collisions. If
+    set to `False`, they are just ignored.
+
+    Simple example::
+
+        >>> import xmltodict
+        >>> doc = xmltodict.parse(\"\"\"
+        ... <a prop="x">
+        ...   <b>1</b>
+        ...   <b>2</b>
+        ... </a>
+        ... \"\"\")
+        >>> doc['a']['@prop']
+        u'x'
+        >>> doc['a']['b']
+        [u'1', u'2']
+
+    If `item_depth` is `0`, the function returns a dictionary for the root
+    element (default behavior). Otherwise, it calls `item_callback` every time
+    an item at the specified depth is found and returns `None` in the end
+    (streaming mode).
+
+    The callback function receives two parameters: the `path` from the document
+    root to the item (name-attribs pairs), and the `item` (dict). If the
+    callback's return value is false-ish, parsing will be stopped with the
+    :class:`ParsingInterrupted` exception.
+
+    Streaming example::
+
+        >>> def handle(path, item):
+        ...     print 'path:%s item:%s' % (path, item)
+        ...     return True
+        ...
+        >>> xmltodict.parse(\"\"\"
+        ... <a prop="x">
+        ...   <b>1</b>
+        ...   <b>2</b>
+        ... </a>\"\"\", item_depth=2, item_callback=handle)
+        path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
+        path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2
+
+    The optional argument `postprocessor` is a function that takes `path`,
+    `key` and `value` as positional arguments and returns a new `(key, value)`
+    pair where both `key` and `value` may have changed. Usage example::
+
+        >>> def postprocessor(path, key, value):
+        ...     try:
+        ...         return key + ':int', int(value)
+        ...     except (ValueError, TypeError):
+        ...         return key, value
+        >>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
+        ...                 postprocessor=postprocessor)
+        OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))])
+
+    You can pass an alternate version of `expat` (such as `defusedexpat`) by
+    using the `expat` parameter. E.g:
+
+        >>> import defusedexpat
+        >>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
+        OrderedDict([(u'a', u'hello')])
+
+    """
+    handler = _DictSAXHandler(namespace_separator=namespace_separator,
+                              **kwargs)
+    if isinstance(xml_input, _unicode):
+        if not encoding:
+            encoding = 'utf-8'
+        xml_input = xml_input.encode(encoding)
+    if not process_namespaces:
+        namespace_separator = None
+    parser = expat.ParserCreate(
+        encoding,
+        namespace_separator
+    )
+    try:
+        parser.ordered_attributes = True
+    except AttributeError:
+        # Jython's expat does not support ordered_attributes
+        pass
+    parser.StartElementHandler = handler.startElement
+    parser.EndElementHandler = handler.endElement
+    parser.CharacterDataHandler = handler.characters
+    parser.buffer_text = True
+    try:
+        parser.ParseFile(xml_input)
+    except (TypeError, AttributeError):
+        parser.Parse(xml_input, True)
+    return handler.item
+
+
+def _emit(key, value, content_handler,
+          attr_prefix='@',
+          cdata_key='#text',
+          depth=0,
+          preprocessor=None,
+          pretty=False,
+          newl='\n',
+          indent='\t'):
+    if preprocessor is not None:
+        result = preprocessor(key, value)
+        if result is None:
+            return
+        key, value = result
+    if not isinstance(value, (list, tuple)):
+        value = [value]
+    if depth == 0 and len(value) > 1:
+        raise ValueError('document with multiple roots')
+    for v in value:
+        if v is None:
+            v = OrderedDict()
+        elif not isinstance(v, dict):
+            v = _unicode(v)
+        if isinstance(v, _basestring):
+            v = OrderedDict(((cdata_key, v),))
+        cdata = None
+        attrs = OrderedDict()
+        children = []
+        for ik, iv in v.items():
+            if ik == cdata_key:
+                cdata = iv
+                continue
+            if ik.startswith(attr_prefix):
+                attrs[ik[len(attr_prefix):]] = iv
+                continue
+            children.append((ik, iv))
+        if pretty:
+            content_handler.ignorableWhitespace(depth * indent)
+        content_handler.startElement(key, AttributesImpl(attrs))
+        if pretty and children:
+            content_handler.ignorableWhitespace(newl)
+        for child_key, child_value in children:
+            _emit(child_key, child_value, content_handler,
+                  attr_prefix, cdata_key, depth+1, preprocessor,
+                  pretty, newl, indent)
+        if cdata is not None:
+            content_handler.characters(cdata)
+        if pretty and children:
+            content_handler.ignorableWhitespace(depth * indent)
+        content_handler.endElement(key)
+        if pretty and depth:
+            content_handler.ignorableWhitespace(newl)
+
+
+def unparse(input_dict, output=None, encoding='utf-8', full_document=True,
+            **kwargs):
+    """Emit an XML document for the given `input_dict` (reverse of `parse`).
+
+    The resulting XML document is returned as a string, but if `output` (a
+    file-like object) is specified, it is written there instead.
+
+    Dictionary keys prefixed with `attr_prefix` (default=`'@'`) are interpreted
+    as XML node attributes, whereas keys equal to `cdata_key`
+    (default=`'#text'`) are treated as character data.
+
+    The `pretty` parameter (default=`False`) enables pretty-printing. In this
+    mode, lines are terminated with `'\n'` and indented with `'\t'`, but this
+    can be customized with the `newl` and `indent` parameters.
+
+    """
+    ((key, value),) = input_dict.items()
+    must_return = False
+    if output is None:
+        output = StringIO()
+        must_return = True
+    content_handler = XMLGenerator(output, encoding)
+    if full_document:
+        content_handler.startDocument()
+    _emit(key, value, content_handler, **kwargs)
+    if full_document:
+        content_handler.endDocument()
+    if must_return:
+        value = output.getvalue()
+        try:  # pragma no cover
+            value = value.decode(encoding)
+        except AttributeError:  # pragma no cover
+            pass
+        return value
+
+if __name__ == '__main__':  # pragma: no cover
+    import sys
+    import marshal
+
+    (item_depth,) = sys.argv[1:]
+    item_depth = int(item_depth)
+
+    def handle_item(path, item):
+        marshal.dump((path, item), sys.stdout)
+        return True
+
+    try:
+        root = parse(sys.stdin,
+                     item_depth=item_depth,
+                     item_callback=handle_item,
+                     dict_constructor=dict)
+        if item_depth == 0:
+            handle_item([], root)
+    except KeyboardInterrupt:
+        pass
--- a/sickbeard/metadata/generic.py
+++ b/sickbeard/metadata/generic.py
@ -782,15 +782,11 @@ class GenericMetadata():

        # Try and get posters and fanart from TMDB
        if image_url is None:
-            for show_name in set(allPossibleShowNames(show_obj)):
            if image_type in ('poster', 'poster_thumb'):
                image_url = self._retrieve_show_images_from_tmdb(show_obj, poster=True)
            elif image_type == 'fanart':
                image_url = self._retrieve_show_images_from_tmdb(show_obj, backdrop=True)

-                if image_url:
-                    break
-
        if image_url:
            image_data = metadata_helpers.getShowImage(image_url, which)
            return image_data
@ -965,8 +961,6 @@ class GenericMetadata():
        return (indexer_id, name, indexer)

    def _retrieve_show_images_from_tmdb(self, show, backdrop=False, poster=False):
-        tmdb_id = None
-
        # get TMDB configuration info
        tmdb = TMDB(sickbeard.TMDB_API_KEY)
        config = tmdb.Configuration()
@ -981,27 +975,14 @@ class GenericMetadata():

        try:
            search = tmdb.Search()
-            for result in search.collection({'query': show.name}) + search.tv({'query': show.name}):
-                tmdb_id = result['id']
-                external_ids = tmdb.TV(tmdb_id).external_ids()
-                if show.indexerid in [external_ids['tvdb_id'], external_ids['tvrage_id']]:
-                    break
+            for show_name in set(allPossibleShowNames(show)):
+                for result in search.collection({'query': show_name})['results'] + search.tv({'query': show_name})['results']:
+                    if backdrop and result['backdrop_path']:
+                        return "{0}{1}{2}".format(base_url, max_size, result['backdrop_path'])
+                    elif poster and result['poster_path']:
+                        return "{0}{1}{2}".format(base_url, max_size, result['poster_path'])

-            if tmdb_id:
-                images = tmdb.Collections(tmdb_id).images()
-                if len(images) > 0:
-                    # get backdrop urls
-                    if backdrop:
-                        rel_path = images['backdrops'][0]['file_path']
-                        url = "{0}{1}{2}".format(base_url, max_size, rel_path)
-                        return url
-
-                    # get poster urls
-                    if poster:
-                        rel_path = images['posters'][0]['file_path']
-                        url = "{0}{1}{2}".format(base_url, max_size, rel_path)
-                        return url
-        except:
+        except Exception, e:
            pass

        logger.log(u"Could not find any posters or background for " + show.name, logger.DEBUG)
--- a/sickbeard/tv.py
+++ b/sickbeard/tv.py
@ -829,7 +829,7 @@ class TVShow(object):
            self.airs = myEp["airs_dayofweek"] + " " + myEp["airs_time"]

        if getattr(myEp, 'firstaired', None) is not None:
-            self.startyear = int(myEp["firstaired"].split('-')[0])
+            self.startyear = int(str(myEp["firstaired"]).split('-')[0])

        self.status = getattr(myEp, 'status', '')

@ -855,7 +855,6 @@ class TVShow(object):
            i = imdb.IMDb()
            imdbTv = i.get_movie(str(re.sub("[^0-9]", "", self.imdbid)))

-            test = imdbTv.keys()
            for key in filter(lambda x: x.replace('_', ' ') in imdbTv.keys(), imdb_info.keys()):
                # Store only the first value for string type
                if type(imdb_info[key]) == type('') and type(imdbTv.get(key)) == type([]):
@ -1556,7 +1555,7 @@ class TVEpisode(object):
                self.deleteEpisode()
            return False

-        if myEp["absolute_number"] == None or myEp["absolute_number"] == "":
+        if getattr(myEp, 'absolute_number', None) is None:
            logger.log(u"This episode (" + self.show.name + " - " + str(season) + "x" + str(
                episode) + ") has no absolute number on " + sickbeard.indexerApi(
                self.indexer).name
@ -1564,7 +1563,7 @@ class TVEpisode(object):
        else:
            logger.log(
                str(self.show.indexerid) + ": The absolute_number for " + str(season) + "x" + str(episode) + " is : " +
-                myEp["absolute_number"], logger.DEBUG)
+                str(myEp["absolute_number"]), logger.DEBUG)
            self.absolute_number = int(myEp["absolute_number"])

        self.name = getattr(myEp, 'episodename', "")
@ -1603,6 +1602,7 @@ class TVEpisode(object):
                u"The show dir is missing, not bothering to change the episode statuses since it'd probably be invalid")
            return

+        if self.location:
            logger.log(str(self.show.indexerid) + u": Setting status for " + str(season) + "x" + str(
                episode) + " based on status " + str(self.status) + " and existence of " + self.location, logger.DEBUG)