Update imdbpy libs to v5.0

Fixed invalid indexer id issues for TVRage shows. Fixed issues for getting posters and backdrops for TVRage shows. We now convert XML straight to a dict object for Indexer APIs, improved overall performance api's Fixed issues with TVRage shows and displaying genre's properly.
2025-03-03 01:52:02 -05:00 · 2014-05-28 22:40:12 -07:00 · 2014-05-28 22:40:12 -07:00 · 2dcd26e69c
commit 2dcd26e69c
parent 764cf6e62e
30 changed files with 7446 additions and 453 deletions
--- a/lib/imdb/init.py
+++ b/lib/imdb/init.py
@ -6,7 +6,7 @@ a person from the IMDb database.
 It can fetch data through different media (e.g.: the IMDb web pages,
 a SQL database, etc.)

-Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
+Copyright 2004-2014 Davide Alberani <da@erlug.linux.it>

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@ -25,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 __all__ = ['IMDb', 'IMDbError', 'Movie', 'Person', 'Character', 'Company',
            'available_access_systems']
-__version__ = VERSION = '4.9'
+__version__ = VERSION = '5.0'

 # Import compatibility module (importing it is enough).
 import _compat
@ -160,6 +160,7 @@ def IMDb(accessSystem=None, *arguments, **keywords):
            kwds.update(keywords)
            keywords = kwds
        except Exception, e:
+            import logging
            logging.getLogger('imdbpy').warn('Unable to read configuration' \
                                            ' file; complete error: %s' % e)
            # It just LOOKS LIKE a bad habit: we tried to read config
@ -303,7 +304,7 @@ class IMDbBase:
        # http://akas.imdb.com/keyword/%s/
        imdbURL_keyword_main=imdbURL_base + 'keyword/%s/'
        # http://akas.imdb.com/chart/top
-        imdbURL_top250=imdbURL_base + 'chart/top',
+        imdbURL_top250=imdbURL_base + 'chart/top'
        # http://akas.imdb.com/chart/bottom
        imdbURL_bottom100=imdbURL_base + 'chart/bottom'
        # http://akas.imdb.com/find?%s
@ -824,22 +825,23 @@ class IMDbBase:
        #      subclass, somewhere under the imdb.parser package.
        raise NotImplementedError('override this method')

-    def _searchIMDb(self, kind, ton):
+    def _searchIMDb(self, kind, ton, title_kind=None):
        """Search the IMDb akas server for the given title or name."""
        # The Exact Primary search system has gone AWOL, so we resort
        # to the mobile search. :-/
        if not ton:
            return None
+        ton = ton.strip('"')
        aSystem = IMDb('mobile')
        if kind == 'tt':
            searchFunct = aSystem.search_movie
-            check = 'long imdb canonical title'
+            check = 'long imdb title'
        elif kind == 'nm':
            searchFunct = aSystem.search_person
-            check = 'long imdb canonical name'
+            check = 'long imdb name'
        elif kind == 'char':
            searchFunct = aSystem.search_character
-            check = 'long imdb canonical name'
+            check = 'long imdb name'
        elif kind == 'co':
            # XXX: are [COUNTRY] codes included in the results?
            searchFunct = aSystem.search_company
@ -852,24 +854,42 @@ class IMDbBase:
        # exact match.
        if len(searchRes) == 1:
            return searchRes[0].getID()
+        title_only_matches = []
        for item in searchRes:
            # Return the first perfect match.
-            if item[check] == ton:
-                return item.getID()
+            if item[check].strip('"') == ton:
+                # For titles do additional check for kind
+                if kind != 'tt' or title_kind == item['kind']:
+                    return item.getID()
+                elif kind == 'tt':
+                    title_only_matches.append(item.getID())
+        # imdbpy2sql.py could detected wrong type, so if no title and kind
+        # matches found - collect all results with title only match
+        # Return list of IDs if multiple matches (can happen when searching
+        # titles with no title_kind specified)
+        # Example: DB: Band of Brothers "tv series" vs "tv mini-series"
+        if title_only_matches:
+            if len(title_only_matches) == 1:
+                return title_only_matches[0]
+            else:
+                return title_only_matches
        return None

-    def title2imdbID(self, title):
+    def title2imdbID(self, title, kind=None):
        """Translate a movie title (in the plain text data files format)
        to an imdbID.
        Try an Exact Primary Title search on IMDb;
-        return None if it's unable to get the imdbID."""
-        return self._searchIMDb('tt', title)
+        return None if it's unable to get the imdbID;
+        Always specify kind: movie, tv series, video game etc. or search can
+        return list of IDs if multiple matches found
+        """
+        return self._searchIMDb('tt', title, kind)

    def name2imdbID(self, name):
        """Translate a person name in an imdbID.
        Try an Exact Primary Name search on IMDb;
        return None if it's unable to get the imdbID."""
-        return self._searchIMDb('tt', name)
+        return self._searchIMDb('nm', name)

    def character2imdbID(self, name):
        """Translate a character name in an imdbID.
@ -896,7 +916,8 @@ class IMDbBase:
                imdbID = aSystem.get_imdbMovieID(mop.movieID)
            else:
                imdbID = aSystem.title2imdbID(build_title(mop, canonical=0,
-                                                ptdf=1))
+                                                ptdf=0, appendKind=False),
+                                                mop['kind'])
        elif isinstance(mop, Person.Person):
            if mop.personID is not None:
                imdbID = aSystem.get_imdbPersonID(mop.personID)
--- a/lib/imdb/imdbpy.cfg
+++ b/lib/imdb/imdbpy.cfg
@ -29,7 +29,7 @@

 [imdbpy]
 ## Default.
-accessSystem = mobile
+accessSystem = http

 ## Optional (options common to every data access system):
 # Activate adult searches (on, by default).
@ -37,7 +37,7 @@ accessSystem = mobile
 # Number of results for searches (20 by default).
 #results = 20
 # Re-raise all caught exceptions (off, by default).
-reraiseExceptions = on
+#reraiseExceptions = off

 ## Optional (options common to http and mobile data access systems):
 # Proxy used to access the network.  If it requires authentication,
@ -69,7 +69,7 @@ reraiseExceptions = on
 ## Set the threshold for logging messages.
 # Can be one of "debug", "info", "warning", "error", "critical" (default:
 # "warning").
-loggingLevel = info
+#loggingLevel = debug

 ## Path to a configuration file for the logging facility;
 # see: http://docs.python.org/library/logging.html#configuring-logging
--- a/lib/imdb/linguistics.py
+++ b/lib/imdb/linguistics.py
@ -64,8 +64,10 @@ LANG_ARTICLES = {
    'English': ('the', 'a', 'an'),
    'Italian': ('la', 'le', "l'", 'il', 'i', 'un', 'una', 'gli', 'lo', "un'",
                'uno'),
-    'Spanish': ('la', 'le', 'el', 'les', 'un', 'los', 'una', 'uno', 'unos',
-                'unas'),
+    'Spanish': ('la', 'lo', 'el', 'las', 'un', 'los', 'una', 'al', 'del',
+                'unos', 'unas', 'uno'),
+    'French': ('le', "l'", 'la', 'les', 'un', 'une', 'des', 'au', 'du', '\xc3\xa0 la',
+                'de la', 'aux'),
    'Portuguese': ('a', 'as', 'o', 'os', 'um', 'uns', 'uma', 'umas'),
    'Turkish': (), # Some languages doesn't have articles.
 }
--- a/lib/imdb/locale/generatepot.py
+++ b/lib/imdb/locale/generatepot.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
 """
 generatepot.py script.

--- a/lib/imdb/locale/imdbpy-ar.po
+++ b/lib/imdb/locale/imdbpy-ar.po
--- a/lib/imdb/locale/imdbpy-bg.po
+++ b/lib/imdb/locale/imdbpy-bg.po
--- a/lib/imdb/locale/imdbpy-de.po
+++ b/lib/imdb/locale/imdbpy-de.po
--- a/lib/imdb/locale/imdbpy-es.po
+++ b/lib/imdb/locale/imdbpy-es.po
--- a/lib/imdb/locale/imdbpy-fr.po
+++ b/lib/imdb/locale/imdbpy-fr.po
--- a/lib/imdb/locale/msgfmt.py
+++ b/lib/imdb/locale/msgfmt.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
 # -*- coding: iso-8859-1 -*-
 """Generate binary message catalog from textual translation description.

--- a/lib/imdb/locale/rebuildmo.py
+++ b/lib/imdb/locale/rebuildmo.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
 """
 rebuildmo.py script.

--- a/lib/imdb/parser/http/init.py
+++ b/lib/imdb/parser/http/init.py
@ -104,15 +104,24 @@ PY_VERSION = sys.version_info[:2]
 # The cookies for the "adult" search.
 # Please don't mess with these account.
 # Old 'IMDbPY' account.
-_old_cookie_id = 'boM2bYxz9MCsOnH9gZ0S9QHs12NWrNdApxsls1Vb5/NGrNdjcHx3dUas10UASoAjVEvhAbGagERgOpNkAPvxdbfKwaV2ikEj9SzXY1WPxABmDKQwdqzwRbM+12NSeJFGUEx3F8as10WwidLzVshDtxaPIbP13NdjVS9UZTYqgTVGrNcT9vyXU1'
-_old_cookie_uu = '3M3AXsquTU5Gur/Svik+ewflPm5Rk2ieY3BIPlLjyK3C0Dp9F8UoPgbTyKiGtZp4x1X+uAUGKD7BM2g+dVd8eqEzDErCoYvdcvGLvVLAen1y08hNQtALjVKAe+1hM8g9QbNonlG1/t4S82ieUsBbrSIQbq1yhV6tZ6ArvSbA7rgHc8n5AdReyAmDaJ5Wm/ee3VDoCnGj/LlBs2ieUZNorhHDKK5Q=='
-# New 'IMDbPYweb' account.
-_cookie_id = 'rH1jNAkjTlNXvHolvBVBsgaPICNZbNdjVjzFwzas9JRmusdjVoqBs/Hs12NR+1WFxEoR9bGKEDUg6sNlADqXwkas12N131Rwdb+UQNGKN8PWrNdjcdqBQVLq8mbGDHP3hqzxhbD692NQi9D0JjpBtRaPIbP1zNdjUOqENQYv1ADWrNcT9vyXU1'
-_cookie_uu = 'su4/m8cho4c6HP+W1qgq6wchOmhnF0w+lIWvHjRUPJ6nRA9sccEafjGADJ6hQGrMd4GKqLcz2X4z5+w+M4OIKnRn7FpENH7dxDQu3bQEHyx0ZEyeRFTPHfQEX03XF+yeN1dsPpcXaqjUZAw+lGRfXRQEfz3RIX9IgVEffdBAHw2wQXyf9xdMPrQELw0QNB8dsffsqcdQemjPB0w+moLcPh0JrKrHJ9hjBzdMPpcXTH7XRwwOk='
+_IMDbPY_cookie_id = 'boM2bYxz9MCsOnH9gZ0S9QHs12NWrNdApxsls1Vb5/NGrNdjcHx3dUas10UASoAjVEvhAbGagERgOpNkAPvxdbfKwaV2ikEj9SzXY1WPxABmDKQwdqzwRbM+12NSeJFGUEx3F8as10WwidLzVshDtxaPIbP13NdjVS9UZTYqgTVGrNcT9vyXU1'
+_IMDbPY_cookie_uu = '3M3AXsquTU5Gur/Svik+ewflPm5Rk2ieY3BIPlLjyK3C0Dp9F8UoPgbTyKiGtZp4x1X+uAUGKD7BM2g+dVd8eqEzDErCoYvdcvGLvVLAen1y08hNQtALjVKAe+1hM8g9QbNonlG1/t4S82ieUsBbrSIQbq1yhV6tZ6ArvSbA7rgHc8n5AdReyAmDaJ5Wm/ee3VDoCnGj/LlBs2ieUZNorhHDKK5Q=='
+# 'imdbpy2010' account.
+_imdbpy2010_cookie_id = 'QrCdxVi+L+WgqOLrQJJgBgRRXGInphxiBPU/YXSFDyExMFzCp6YcYgSVXyEUhS/xMID8wqemHGID4DlntwZ49vemP5UXsAxiJ4D6goSmHGIgNT9hMXBaRSF2vMS3phxB0bVfQiQlP1RxdrzhB6YcRHFASyIhQVowwXCKtDSlD2YhgRvxBsCKtGemHBKH9mxSI='
+_imdbpy2010_cookie_uu = 'oiEo2yoJFCA2Zbn/o7Z1LAPIwotAu6QdALv3foDb1x5F/tdrFY63XkSfty4kntS8Y8jkHSDLt3406+d+JThEilPI0mtTaOQdA/t2/iErp22jaLdeVU5ya4PIREpj7HFdpzhEHadcIAngSER50IoHDpD6Bz4Qy3b+UIhE/hBbhz5Q63ceA2hEvhPo5B0FnrL9Q8jkWjDIbA0Au3d+AOtnXoCIRL4Q28c+UOtnXpP4RL4T6OQdA+6ijUCI5B0AW2d+UOtnXpPYRL4T6OQdA8jkTUOYlC0A=='
+# old 'IMDbPYweb' account.
+_old_IMDbPYweb_cookie_id = 'rH1jNAkjTlNXvHolvBVBsgaPICNZbNdjVjzFwzas9JRmusdjVoqBs/Hs12NR+1WFxEoR9bGKEDUg6sNlADqXwkas12N131Rwdb+UQNGKN8PWrNdjcdqBQVLq8mbGDHP3hqzxhbD692NQi9D0JjpBtRaPIbP1zNdjUOqENQYv1ADWrNcT9vyXU1'
+_old_IMDbPYweb_cookie_uu = 'su4/m8cho4c6HP+W1qgq6wchOmhnF0w+lIWvHjRUPJ6nRA9sccEafjGADJ6hQGrMd4GKqLcz2X4z5+w+M4OIKnRn7FpENH7dxDQu3bQEHyx0ZEyeRFTPHfQEX03XF+yeN1dsPpcXaqjUZAw+lGRfXRQEfz3RIX9IgVEffdBAHw2wQXyf9xdMPrQELw0QNB8dsffsqcdQemjPB0w+moLcPh0JrKrHJ9hjBzdMPpcXTH7XRwwOk='
+# old 'IMDbPYweb' account values (as of 2012-12-30)
+_IMDbPYweb_cookie_id = 'BCYjtpb46Go0cMHAMewWZEauhwqPL7ASCPpPVNutu6BuayHZd0U6Dk3UAqVlEM8DHLDsSr02RGQn5ff3245-R4A130NAWJ_5yqXx7X-zJey8vQM8JKdv3rTUSEJznJQlojUW1Bije-Q0FXAixs4I0sePWhd_tA41i-9AF2q3lPmaksram6ilMhN9i3IPESW1PMbk'
+_IMDbPYweb_cookie_uu = 'BCYttQjEMc-NyUdFUGxThidAnBo7wwalEzj4un9uzf2XoEjtqDhNfrH7bOSuwlRkMEQ11SNyTajl-b9Q-21m4HwYu0e3jXZrjYLXLYzFkrEroCDyUREqaTwPJPSjGtFmvlaVBZEZmsWpaxe18DT5KiygKyGPZKH78Xu4im6ba-Sd31WvbXHzP8KGXPpGjhhVuv7Dcv314HCWkE832Srf9ya-Uv0FdGAmYyLbIAXuxnvpYQd6oZ8-CYkSGLIqcKWdrf5S'
+# 'IMDbPY2013' account
+_IMDbPY2013_cookie_id = 'BCYmoyqSm2WglmOzG-SrFWSvVpxsTZOB0qEOOqmAwCBxCbaNgKOxd0DTKzUvt7t04Pya5gV2tUrpDmYxrc1Dr54DQj2UXI7QI35__M5-HI2KrbOI3PjDz6M-_U3HG8topMfN64R24tmBixoZhMYXVaEc556lf0Z4gQNJVYRANXvwytP5v1lpfeToRlu9aVJwN4kT'
+_IMDbPY2013_cookie_uu = 'BCYquDS8Y2i8R1pJxS4nB77YrhjHHXeOea2Xl9KtZvE6RZKVfMvzTGU4Vl5-yxfPbgRSiFJasyf-hhPuVvXyaHlfeBjNlbFT8hz2HzFFkQ_SxKxq05J51gi7Fv4SaAws1M-i7zmQ1TRunfJqCVIYqPwIs2NO7s4_YDH2ZoISVGLgca8OY2K58HychOZB1oRWHVeAJNhLJMrCWJBuGRLCNnQK5X9tA0dPPntr2Ussy0ouul-N1GQz-8y5vda3JJ_C6xkwmHcA6JrOdOFO_HqMWjVSXuxGEdrXC919JM9H0vooVvKeVgAEJnTh2GiVlUJUoH3c'

-# imdbpy2010 account.
-#_cookie_id = 'QrCdxVi+L+WgqOLrQJJgBgRRXGInphxiBPU/YXSFDyExMFzCp6YcYgSVXyEUhS/xMID8wqemHGID4DlntwZ49vemP5UXsAxiJ4D6goSmHGIgNT9hMXBaRSF2vMS3phxB0bVfQiQlP1RxdrzhB6YcRHFASyIhQVowwXCKtDSlD2YhgRvxBsCKtGemHBKH9mxSI='
-#_cookie_uu = 'oiEo2yoJFCA2Zbn/o7Z1LAPIwotAu6QdALv3foDb1x5F/tdrFY63XkSfty4kntS8Y8jkHSDLt3406+d+JThEilPI0mtTaOQdA/t2/iErp22jaLdeVU5ya4PIREpj7HFdpzhEHadcIAngSER50IoHDpD6Bz4Qy3b+UIhE/hBbhz5Q63ceA2hEvhPo5B0FnrL9Q8jkWjDIbA0Au3d+AOtnXoCIRL4Q28c+UOtnXpP4RL4T6OQdA+6ijUCI5B0AW2d+UOtnXpPYRL4T6OQdA8jkTUOYlC0A=='
+# Currently used account.
+_cookie_id = _IMDbPY2013_cookie_id
+_cookie_uu = _IMDbPY2013_cookie_uu


 class _FakeURLOpener(object):
@ -141,9 +150,10 @@ class IMDbURLopener(FancyURLopener):
        for header in ('User-Agent', 'User-agent', 'user-agent'):
            self.del_header(header)
        self.set_header('User-Agent', 'Mozilla/5.0')
+        self.set_header('Accept-Language', 'en-us,en;q=0.5')
        # XXX: This class is used also to perform "Exact Primary
        #      [Title|Name]" searches, and so by default the cookie is set.
-        c_header = 'id=%s; uu=%s' % (_cookie_id, _cookie_uu)
+        c_header = 'uu=%s; id=%s' % (_cookie_uu, _cookie_id)
        self.set_header('Cookie', c_header)

    def get_proxy(self):
@ -199,12 +209,11 @@ class IMDbURLopener(FancyURLopener):
            server_encode = uopener.info().getparam('charset')
            # Otherwise, look at the content-type HTML meta tag.
            if server_encode is None and content:
-                first_bytes = content[:512]
-                begin_h = first_bytes.find('text/html; charset=')
+                begin_h = content.find('text/html; charset=')
                if begin_h != -1:
-                    end_h = first_bytes[19+begin_h:].find('"')
+                    end_h = content[19+begin_h:].find('"')
                    if end_h != -1:
-                        server_encode = first_bytes[19+begin_h:19+begin_h+end_h]
+                        server_encode = content[19+begin_h:19+begin_h+end_h]
            if server_encode:
                try:
                    if lookup(server_encode):
@ -455,16 +464,16 @@ class IMDbHTTPAccessSystem(IMDbBase):
        results is the maximum number of results to be retrieved."""
        if isinstance(ton, unicode):
            try:
-                ton = ton.encode('iso8859-1')
+                ton = ton.encode('utf-8')
            except Exception, e:
                try:
-                    ton = ton.encode('utf-8')
+                    ton = ton.encode('iso8859-1')
                except Exception, e:
                    pass
        ##params = 'q=%s&%s=on&mx=%s' % (quote_plus(ton), kind, str(results))
-        params = 'q=%s;s=%s;mx=%s' % (quote_plus(ton), kind, str(results))
+        params = 'q=%s&s=%s&mx=%s' % (quote_plus(ton), kind, str(results))
        if kind == 'ep':
-            params = params.replace('s=ep;', 's=tt;ttype=ep;', 1)
+            params = params.replace('s=ep&', 's=tt&ttype=ep&', 1)
        cont = self._retrieve(self.urls['find'] % params)
        #print 'URL:', imdbURL_find % params
        if cont.find('Your search returned more than') == -1 or \
@ -472,7 +481,7 @@ class IMDbHTTPAccessSystem(IMDbBase):
            return cont
        # The retrieved page contains no results, because too many
        # titles or names contain the string we're looking for.
-        params = 'q=%s;ls=%s;lm=0' % (quote_plus(ton), kind)
+        params = 'q=%s&ls=%s&lm=0' % (quote_plus(ton), kind)
        size = 131072 + results * 512
        return self._retrieve(self.urls['find'] % params, size=size)

@ -587,6 +596,10 @@ class IMDbHTTPAccessSystem(IMDbBase):
        cont = self._retrieve(self.urls['movie_main'] % movieID + 'recommendations')
        return self.mProxy.rec_parser.parse(cont)

+    def get_movie_critic_reviews(self, movieID):
+        cont = self._retrieve(self.urls['movie_main'] % movieID + 'criticreviews')
+        return self.mProxy.criticrev_parser.parse(cont)
+
    def get_movie_external_reviews(self, movieID):
        cont = self._retrieve(self.urls['movie_main'] % movieID + 'externalreviews')
        return self.mProxy.externalrev_parser.parse(cont)
@ -754,7 +767,7 @@ class IMDbHTTPAccessSystem(IMDbBase):
        return self.pProxy.person_keywords_parser.parse(cont)

    def _search_character(self, name, results):
-        cont = self._get_search_content('char', name, results)
+        cont = self._get_search_content('ch', name, results)
        return self.scProxy.search_character_parser.parse(cont, results=results)['data']

    def get_character_main(self, characterID):
--- a/lib/imdb/parser/http/movieParser.py
+++ b/lib/imdb/parser/http/movieParser.py
@ -9,7 +9,7 @@ pages would be:
    plot summary:       http://akas.imdb.com/title/tt0094226/plotsummary
    ...and so on...

-Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
+Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>

 This program is free software; you can redistribute it and/or modify
@ -531,9 +531,6 @@ class DOMHTMLMovieParser(DOMParserBase):
 def _process_plotsummary(x):
    """Process a plot (contributed by Rdian06)."""
    xauthor = x.get('author')
-    if xauthor:
-        xauthor = xauthor.replace('{', '<').replace('}', '>').replace('(',
-                                    '<').replace(')', '>').strip()
    xplot = x.get('plot', u'').strip()
    if xauthor:
        xplot += u'::%s' % xauthor
@ -555,17 +552,20 @@ class DOMHTMLPlotParser(DOMParserBase):
    # Notice that recently IMDb started to put the email of the
    # author only in the link, that we're not collecting, here.
    extractors = [Extractor(label='plot',
-                    path="//p[@class='plotpar']",
-                    attrs=Attribute(key='plot',
-                            multi=True,
-                            path={'plot': './text()',
-                                'author': './i/a/text()'},
-                            postprocess=_process_plotsummary))]
+                            path="//ul[@class='zebraList']//p",
+                            attrs=Attribute(key='plot',
+                                            multi=True,
+                                            path={'plot': './text()[1]',
+                                                  'author': './span/em/a/text()'},
+                                            postprocess=_process_plotsummary))]


 def _process_award(x):
    award = {}
-    award['award'] = x.get('award').strip()
+    _award = x.get('award')
+    if _award is not None:
+        _award = _award.strip()
+    award['award'] = _award
    if not award['award']:
        return {}
    award['year'] = x.get('year').strip()
@ -709,10 +709,16 @@ class DOMHTMLTaglinesParser(DOMParserBase):
        result = tparser.parse(taglines_html_string)
    """
    extractors = [Extractor(label='taglines',
-                            path="//div[@id='tn15content']/p",
-                            attrs=Attribute(key='taglines', multi=True,
+                            path='//*[contains(concat(" ", normalize-space(@class), " "), " soda ")]',
+                            attrs=Attribute(key='taglines',
+                                            multi=True,
                                            path="./text()"))]

+    def postprocess_data(self, data):
+        if 'taglines' in data:
+            data['taglines'] = [tagline.strip() for tagline in data['taglines']]
+        return data
+

 class DOMHTMLKeywordsParser(DOMParserBase):
    """Parser for the "keywords" page of a given movie.
@ -785,9 +791,9 @@ class DOMHTMLSoundtrackParser(DOMHTMLAlternateVersionsParser):
        ]

    def postprocess_data(self, data):
-        if 'soundtrack' in data:
+        if 'alternate versions' in data:
            nd = []
-            for x in data['soundtrack']:
+            for x in data['alternate versions']:
                ds = x.split('\n')
                title = ds[0]
                if title[0] == '"' and title[-1] == '"':
@ -846,6 +852,13 @@ class DOMHTMLCrazyCreditsParser(DOMParserBase):
                                    x.replace('\n', ' ').replace('  ', ' ')))]


+def _process_goof(x):
+    if x['spoiler_category']:
+        return x['spoiler_category'].strip() + ': SPOILER: ' + x['text'].strip()
+    else:
+        return x['category'].strip() + ': ' + x['text'].strip()
+
+
 class DOMHTMLGoofsParser(DOMParserBase):
    """Parser for the "goofs" page of a given movie.
    The page should be provided as a string, as taken from
@ -858,9 +871,14 @@ class DOMHTMLGoofsParser(DOMParserBase):
    """
    _defGetRefs = True

-    extractors = [Extractor(label='goofs', path="//ul[@class='trivia']/li",
-                    attrs=Attribute(key='goofs', multi=True, path=".//text()",
-                        postprocess=lambda x: (x or u'').strip()))]
+    extractors = [Extractor(label='goofs', path="//div[@class='soda odd']",
+                    attrs=Attribute(key='goofs', multi=True,
+                        path={
+                              'text':"./text()",
+                              'category':'./preceding-sibling::h4[1]/text()',
+                              'spoiler_category': './h4/text()'
+                        },
+                        postprocess=_process_goof))]


 class DOMHTMLQuotesParser(DOMParserBase):
@ -876,9 +894,16 @@ class DOMHTMLQuotesParser(DOMParserBase):
    _defGetRefs = True

    extractors = [
-        Extractor(label='quotes',
-            path="//div[@class='_imdbpy']",
-            attrs=Attribute(key='quotes',
+        Extractor(label='quotes_odd',
+            path="//div[@class='quote soda odd']",
+            attrs=Attribute(key='quotes_odd',
+                multi=True,
+                path=".//text()",
+                postprocess=lambda x: x.strip().replace(' \n',
+                            '::').replace('::\n', '::').replace('\n', ' '))),
+        Extractor(label='quotes_even',
+            path="//div[@class='quote soda even']",
+            attrs=Attribute(key='quotes_even',
                multi=True,
                path=".//text()",
                postprocess=lambda x: x.strip().replace(' \n',
@ -886,27 +911,23 @@ class DOMHTMLQuotesParser(DOMParserBase):
        ]

    preprocessors = [
-        (re.compile('(<a name="?qt[0-9]{7}"?></a>)', re.I),
-            r'\1<div class="_imdbpy">'),
-        (re.compile('<hr width="30%">', re.I), '</div>'),
-        (re.compile('<hr/>', re.I), '</div>'),
-        (re.compile('<script.*?</script>', re.I|re.S), ''),
-        # For BeautifulSoup.
-        (re.compile('<!-- sid: t-channel : MIDDLE_CENTER -->', re.I), '</div>')
-        ]
+        (re.compile('<a href="#" class="hidesoda hidden">Hide options</a><br>', re.I), '')
+    ]

    def preprocess_dom(self, dom):
        # Remove "link this quote" links.
-        for qLink in self.xpath(dom, "//p[@class='linksoda']"):
+        for qLink in self.xpath(dom, "//span[@class='linksoda']"):
+            qLink.drop_tree()
+        for qLink in self.xpath(dom, "//div[@class='sharesoda_pre']"):
            qLink.drop_tree()
        return dom

    def postprocess_data(self, data):
-        if 'quotes' not in data:
+        quotes = data.get('quotes_odd', []) + data.get('quotes_even', [])
+        if not quotes:
            return {}
-        for idx, quote in enumerate(data['quotes']):
-            data['quotes'][idx] = quote.split('::')
-        return data
+        quotes = [q.split('::') for q in quotes]
+        return {'quotes': quotes}


 class DOMHTMLReleaseinfoParser(DOMParserBase):
@ -920,13 +941,13 @@ class DOMHTMLReleaseinfoParser(DOMParserBase):
        result = rdparser.parse(releaseinfo_html_string)
    """
    extractors = [Extractor(label='release dates',
-                    path="//th[@class='xxxx']/../../tr",
+                    path="//table[@id='release_dates']//tr",
                    attrs=Attribute(key='release dates', multi=True,
                        path={'country': ".//td[1]//text()",
                            'date': ".//td[2]//text()",
                            'notes': ".//td[3]//text()"})),
                Extractor(label='akas',
-                    path="//div[@class='_imdbpy_akas']/table/tr",
+                    path="//table[@id='akas']//tr",
                    attrs=Attribute(key='akas', multi=True,
                        path={'title': "./td[1]/text()",
                            'countries': "./td[2]/text()"}))]
@ -961,7 +982,7 @@ class DOMHTMLReleaseinfoParser(DOMParserBase):
            title = (aka.get('title') or '').strip()
            if not title:
                continue
-            countries = (aka.get('countries') or '').split('/')
+            countries = (aka.get('countries') or '').split(',')
            if not countries:
                nakas.append(title)
            else:
@ -1135,7 +1156,28 @@ def _normalize_href(href):
        href = '%s%s' % (imdbURL_base, href)
    return href

+class DOMHTMLCriticReviewsParser(DOMParserBase):
+    """Parser for the "critic reviews" pages of a given movie.
+    The page should be provided as a string, as taken from
+    the akas.imdb.com server.  The final result will be a
+    dictionary, with a key for every relevant section.

+    Example:
+        osparser = DOMHTMLCriticReviewsParser()
+        result = osparser.parse(officialsites_html_string)
+    """
+    kind = 'critic reviews'
+
+    extractors = [
+        Extractor(label='metascore',
+                path="//div[@class='metascore_wrap']/div/span",
+                attrs=Attribute(key='metascore',
+                                path=".//text()")),
+        Extractor(label='metacritic url',
+                path="//div[@class='article']/div[@class='see-more']/a",
+                attrs=Attribute(key='metacritic url',
+                                path="./@href")) ]
+    
 class DOMHTMLOfficialsitesParser(DOMParserBase):
    """Parser for the "official sites", "external reviews", "newsgroup
    reviews", "miscellaneous links", "sound clips", "video clips" and
@ -1471,6 +1513,14 @@ class DOMHTMLSeasonEpisodesParser(DOMParserBase):
        try: selected_season = int(selected_season)
        except: pass
        nd = {selected_season: {}}
+        if 'episode -1' in data:
+          counter = 1
+          for episode in data['episode -1']:
+            while 'episode %d' % counter in data:
+              counter += 1
+            k = 'episode %d' % counter
+            data[k] = [episode]
+          del data['episode -1']
        for episode_nr, episode in data.iteritems():
            if not (episode and episode[0] and
                    episode_nr.startswith('episode ')):
@ -1860,6 +1910,8 @@ _OBJECTS = {
    'releasedates_parser':  ((DOMHTMLReleaseinfoParser,), None),
    'ratings_parser':  ((DOMHTMLRatingsParser,), None),
    'officialsites_parser':  ((DOMHTMLOfficialsitesParser,), None),
+    'criticrev_parser':  ((DOMHTMLCriticReviewsParser,),
+                            {'kind': 'critic reviews'}),
    'externalrev_parser':  ((DOMHTMLOfficialsitesParser,),
                            {'kind': 'external reviews'}),
    'newsgrouprev_parser':  ((DOMHTMLOfficialsitesParser,),
--- a/lib/imdb/parser/http/personParser.py
+++ b/lib/imdb/parser/http/personParser.py
@ -8,7 +8,7 @@ E.g., for "Mel Gibson" the referred pages would be:
    biography:      http://akas.imdb.com/name/nm0000154/bio
    ...and so on...

-Copyright 2004-20101 Davide Alberani <da@erlug.linux.it>
+Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>

 This program is free software; you can redistribute it and/or modify
@ -60,6 +60,7 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
        result = cparser.parse(categorized_html_string)
    """
    _containsObjects = True
+    _name_imdb_index = re.compile(r'\([IVXLCDM]+\)')

    _birth_attrs = [Attribute(key='birth date',
                        path='.//time[@itemprop="birthDate"]/@datetime'),
@ -100,6 +101,10 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
                            path=".//text()",
                            postprocess=lambda x: analyze_name(x,
                                                               canonical=1))),
+            Extractor(label='name_index',
+                        path="//h1[@class='header']/span[1]",
+                        attrs=Attribute(key='name_index',
+                            path="./text()")),

            Extractor(label='birth info',
                        path="//div[h4='Born:']",
@ -110,7 +115,7 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
                        attrs=_death_attrs),

            Extractor(label='headshot',
-                        path="//td[@id='img_primary']/a",
+                        path="//td[@id='img_primary']/div[@class='image']/a",
                        attrs=Attribute(key='headshot',
                            path="./img/@src")),

@ -152,6 +157,11 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
        for what in 'birth date', 'death date':
            if what in data and not data[what]:
                del data[what]
+        name_index = (data.get('name_index') or '').strip()
+        if name_index:
+            if self._name_imdb_index.match(name_index):
+                data['imdbIndex'] = name_index[1:-1]
+            del data['name_index']
        # XXX: the code below is for backwards compatibility
        # probably could be removed
        for key in data.keys():
@ -220,13 +230,13 @@ class DOMHTMLBioParser(DOMParserBase):
                        attrs=Attribute(key='headshot',
                            path="./img/@src")),
            Extractor(label='birth info',
-                        path="//div[h5='Date of Birth']",
+                        path="//table[@id='overviewTable']//td[text()='Date of Birth']/following-sibling::td[1]",
                        attrs=_birth_attrs),
            Extractor(label='death info',
-                        path="//div[h5='Date of Death']",
+                        path="//table[@id='overviewTable']//td[text()='Date of Death']/following-sibling::td[1]",
                        attrs=_death_attrs),
            Extractor(label='nick names',
-                        path="//div[h5='Nickname']",
+                        path="//table[@id='overviewTable']//td[text()='Nickenames']/following-sibling::td[1]",
                        attrs=Attribute(key='nick names',
                            path="./text()",
                            joiner='|',
@ -234,25 +244,25 @@ class DOMHTMLBioParser(DOMParserBase):
                                    '::(', 1) for n in x.split('|')
                                    if n.strip()])),
            Extractor(label='birth name',
-                        path="//div[h5='Birth Name']",
+                        path="//table[@id='overviewTable']//td[text()='Birth Name']/following-sibling::td[1]",
                        attrs=Attribute(key='birth name',
                            path="./text()",
                            postprocess=lambda x: canonicalName(x.strip()))),
            Extractor(label='height',
-                        path="//div[h5='Height']",
+                path="//table[@id='overviewTable']//td[text()='Height']/following-sibling::td[1]",
                        attrs=Attribute(key='height',
                            path="./text()",
                            postprocess=lambda x: x.strip())),
            Extractor(label='mini biography',
-                        path="//div[h5='Mini Biography']",
+                        path="//a[@name='mini_bio']/following-sibling::div[1 = count(preceding-sibling::a[1] | ../a[@name='mini_bio'])]",
                        attrs=Attribute(key='mini biography',
                            multi=True,
                            path={
-                                'bio': "./p//text()",
-                                'by': "./b/following-sibling::a/text()"
+                                'bio': ".//text()",
+                                'by': ".//a[@name='ba']//text()"
                                },
                            postprocess=lambda x: "%s::%s" % \
-                                (x.get('bio').strip(),
+                                ((x.get('bio') or u'').split('- IMDb Mini Biography By:')[0].strip(),
                                (x.get('by') or u'').strip() or u'Anonymous'))),
            Extractor(label='spouse',
                        path="//div[h5='Spouse']/table/tr",
--- a/lib/imdb/parser/http/searchCharacterParser.py
+++ b/lib/imdb/parser/http/searchCharacterParser.py
@ -5,9 +5,9 @@ This module provides the HTMLSearchCharacterParser class (and the
 search_character_parser instance), used to parse the results of a search
 for a given character.
 E.g., when searching for the name "Jesse James", the parsed page would be:
-    http://akas.imdb.com/find?s=Characters;mx=20;q=Jesse+James
+    http://akas.imdb.com/find?s=ch;mx=20;q=Jesse+James

-Copyright 2007-2009 Davide Alberani <da@erlug.linux.it>
+Copyright 2007-2012 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>

 This program is free software; you can redistribute it and/or modify
@ -42,7 +42,7 @@ class DOMBasicCharacterParser(DOMBasicMovieParser):

 class DOMHTMLSearchCharacterParser(DOMHTMLSearchMovieParser):
    _BaseParser = DOMBasicCharacterParser
-    _notDirectHitTitle = '<title>imdb search'
+    _notDirectHitTitle = '<title>find - imdb'
    _titleBuilder = lambda self, x: build_name(x, canonical=False)
    _linkPrefix = '/character/ch'

@ -57,7 +57,7 @@ class DOMHTMLSearchCharacterParser(DOMHTMLSearchMovieParser):
                            {'name': x.get('name')}
                        ))]
    extractors = [Extractor(label='search',
-                            path="//td[3]/a[starts-with(@href, " \
+                            path="//td[@class='result_text']/a[starts-with(@href, " \
                                    "'/character/ch')]/..",
                            attrs=_attrs)]

--- a/lib/imdb/parser/http/searchCompanyParser.py
+++ b/lib/imdb/parser/http/searchCompanyParser.py
@ -7,7 +7,7 @@ for a given company.
 E.g., when searching for the name "Columbia Pictures", the parsed page would be:
    http://akas.imdb.com/find?s=co;mx=20;q=Columbia+Pictures

-Copyright 2008-2009 Davide Alberani <da@erlug.linux.it>
+Copyright 2008-2012 Davide Alberani <da@erlug.linux.it>
          2008 H. Turgut Uyar <uyar@tekir.org>

 This program is free software; you can redistribute it and/or modify
@ -42,7 +42,7 @@ class DOMBasicCompanyParser(DOMBasicMovieParser):

 class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser):
    _BaseParser = DOMBasicCompanyParser
-    _notDirectHitTitle = '<title>imdb company'
+    _notDirectHitTitle = '<title>find - imdb'
    _titleBuilder = lambda self, x: build_company_name(x)
    _linkPrefix = '/company/co'

@ -59,7 +59,7 @@ class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser):
                                                or u''), stripNotes=True)
                        ))]
    extractors = [Extractor(label='search',
-                            path="//td[3]/a[starts-with(@href, " \
+                            path="//td[@class='result_text']/a[starts-with(@href, " \
                                    "'/company/co')]/..",
                            attrs=_attrs)]

--- a/lib/imdb/parser/http/searchMovieParser.py
+++ b/lib/imdb/parser/http/searchMovieParser.py
@ -8,7 +8,7 @@ E.g., for when searching for the title "the passion", the parsed
 page would be:
    http://akas.imdb.com/find?q=the+passion&tt=on&mx=20

-Copyright 2004-2010 Davide Alberani <da@erlug.linux.it>
+Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>

 This program is free software; you can redistribute it and/or modify
@ -77,7 +77,7 @@ class DOMBasicMovieParser(DOMParserBase):
 def custom_analyze_title(title):
    """Remove garbage notes after the (year), (year/imdbIndex) or (year) (TV)"""
    # XXX: very crappy. :-(
-    nt = title.split('    ')[0]
+    nt = title.split(' aka ')[0]
    if nt:
        title = nt
    if not title:
@ -92,7 +92,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
    "new search system" is used, for movies."""

    _BaseParser = DOMBasicMovieParser
-    _notDirectHitTitle = '<title>imdb title'
+    _notDirectHitTitle = '<title>find - imdb</title>'
    _titleBuilder = lambda self, x: build_title(x)
    _linkPrefix = '/title/tt'

@ -101,8 +101,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
                        path={
                            'link': "./a[1]/@href",
                            'info': ".//text()",
-                            #'akas': ".//div[@class='_imdbpyAKA']//text()"
-                            'akas': ".//p[@class='find-aka']//text()"
+                            'akas': "./i//text()"
                            },
                        postprocess=lambda x: (
                            analyze_imdbid(x.get('link') or u''),
@ -110,7 +109,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
                            x.get('akas')
                        ))]
    extractors = [Extractor(label='search',
-                        path="//td[3]/a[starts-with(@href, '/title/tt')]/..",
+                        path="//td[@class='result_text']",
                        attrs=_attrs)]
    def _init(self):
        self.url = u''
@ -119,14 +118,11 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
        self.url = u''

    def preprocess_string(self, html_string):
-        if self._notDirectHitTitle in html_string[:1024].lower():
+        if self._notDirectHitTitle in html_string[:10240].lower():
            if self._linkPrefix == '/title/tt':
                # Only for movies.
+                # XXX (HTU): does this still apply?
                html_string = html_string.replace('(TV mini-series)', '(mini)')
-                html_string = html_string.replace('<p class="find-aka">',
-                        '<p class="find-aka">::')
-                #html_string = _reAKAStitles.sub(
-                #        r'<div class="_imdbpyAKA">\1::</div>\2', html_string)
            return html_string
        # Direct hit!
        dbme = self._BaseParser(useModule=self._useModule)
@ -141,7 +137,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
        title = self._titleBuilder(res[0][1])
        if not (link and title): return u''
        link = link.replace('http://pro.imdb.com', '')
-        new_html = '<td></td><td></td><td><a href="%s">%s</a></td>' % (link,
+        new_html = '<td class="result_text"><a href="%s">%s</a></td>' % (link,
                                                                    title)
        return new_html

@ -161,11 +157,14 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
                if not datum[0] and datum[1]:
                    continue
                if datum[2] is not None:
-                    akas = filter(None, datum[2].split('::'))
+                    #akas = filter(None, datum[2].split('::'))
                    if self._linkPrefix == '/title/tt':
-                        akas = [a.replace('" - ', '::').rstrip() for a in akas]
-                        akas = [a.replace('aka "', '', 1).replace('aka  "',
-                                '', 1).lstrip() for a in akas]
+                        # XXX (HTU): couldn't find a result with multiple akas
+                        aka = datum[2]
+                        akas = [aka[1:-1]]      # remove the quotes
+                        #akas = [a.replace('" - ', '::').rstrip() for a in akas]
+                        #akas = [a.replace('aka "', '', 1).replace('aka  "',
+                                #'', 1).lstrip() for a in akas]
                    datum[1]['akas'] = akas
                    data['data'][idx] = (datum[0], datum[1])
                else:
--- a/lib/imdb/parser/http/searchPersonParser.py
+++ b/lib/imdb/parser/http/searchPersonParser.py
@ -7,7 +7,7 @@ for a given person.
 E.g., when searching for the name "Mel Gibson", the parsed page would be:
    http://akas.imdb.com/find?q=Mel+Gibson&nm=on&mx=20

-Copyright 2004-2010 Davide Alberani <da@erlug.linux.it>
+Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>

 This program is free software; you can redistribute it and/or modify
@ -55,7 +55,7 @@ class DOMHTMLSearchPersonParser(DOMHTMLSearchMovieParser):
    """Parse the html page that the IMDb web server shows when the
    "new search system" is used, for persons."""
    _BaseParser = DOMBasicPersonParser
-    _notDirectHitTitle = '<title>imdb name'
+    _notDirectHitTitle = '<title>find - imdb'
    _titleBuilder = lambda self, x: build_name(x, canonical=True)
    _linkPrefix = '/name/nm'

@ -74,11 +74,11 @@ class DOMHTMLSearchPersonParser(DOMHTMLSearchMovieParser):
                                         canonical=1), x.get('akas')
                        ))]
    extractors = [Extractor(label='search',
-                            path="//td[3]/a[starts-with(@href, '/name/nm')]/..",
+                            path="//td[@class='result_text']/a[starts-with(@href, '/name/nm')]/..",
                            attrs=_attrs)]

    def preprocess_string(self, html_string):
-        if self._notDirectHitTitle in html_string[:1024].lower():
+        if self._notDirectHitTitle in html_string[:10240].lower():
            html_string = _reAKASp.sub(
                                    r'\1<div class="_imdbpyAKA">\2::</div>\3',
                                    html_string)
--- a/lib/imdb/parser/http/utils.py
+++ b/lib/imdb/parser/http/utils.py
@ -340,7 +340,7 @@ def build_movie(txt, movieID=None, roleID=None, status=None,
        title = title[:nidx].rstrip()
    if year:
        year = year.strip()
-        if title[-1] == ')':
+        if title[-1:] == ')':
            fpIdx = title.rfind('(')
            if fpIdx != -1:
                if notes: notes = '%s %s' % (title[fpIdx:], notes)
--- a/lib/imdb/parser/mobile/init.py
+++ b/lib/imdb/parser/mobile/init.py
@ -6,7 +6,7 @@ IMDb's data for mobile systems.
 the imdb.IMDb function will return an instance of this class when
 called with the 'accessSystem' argument set to "mobile".

-Copyright 2005-2011 Davide Alberani <da@erlug.linux.it>
+Copyright 2005-2012 Davide Alberani <da@erlug.linux.it>

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@ -193,7 +193,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
                                    title)
            return res
        tl = title[0].lower()
-        if not tl.startswith('imdb title'):
+        if not tl.startswith('find - imdb'):
            # a direct hit!
            title = _unHtml(title[0])
            mid = None
@ -211,7 +211,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
            # XXX: this results*3 prevents some recursion errors, but...
            #      it's not exactly understandable (i.e.: why 'results' is
            #      not enough to get all the results?)
-            lis = _findBetween(cont, 'td valign="top">', '</td>',
+            lis = _findBetween(cont, 'td class="result_text">', '</td>',
                                maxRes=results*3)
            for li in lis:
                akas = re_makas.findall(li)
@ -492,7 +492,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
            self._mobile_logger.warn('no title tag searching for name %s', name)
            return res
        nl = name[0].lower()
-        if not nl.startswith('imdb name'):
+        if not nl.startswith('find - imdb'):
            # a direct hit!
            name = _unHtml(name[0])
            name = name.replace('- Filmography by type' , '').strip()
@ -506,7 +506,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
                return res
            res[:] = [(str(pid[0]), analyze_name(name, canonical=1))]
        else:
-            lis = _findBetween(cont, 'td valign="top">', '</td>',
+            lis = _findBetween(cont, 'td class="result_text">', '</td>',
                                maxRes=results*3)
            for li in lis:
                akas = _findBetween(li, '<em>"', '"</em>')
@ -771,7 +771,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
        return {'data': d}

    def _search_character(self, name, results):
-        cont = subXMLRefs(self._get_search_content('char', name, results))
+        cont = subXMLRefs(self._get_search_content('ch', name, results))
        name = _findBetween(cont, '<title>', '</title>', maxRes=1)
        res = []
        if not name:
@ -779,8 +779,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
                                    name)
            return res
        nl = name[0].lower()
-        if not (nl.startswith('imdb search') or nl.startswith('imdb  search') \
-                or nl.startswith('imdb character')):
+        if not nl.startswith('find - imdb'):
            # a direct hit!
            name = _unHtml(name[0]).replace('(Character)', '').strip()
            pid = None
@ -793,23 +792,18 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
                return res
            res[:] = [(str(pid[0]), analyze_name(name))]
        else:
-            sects = _findBetween(cont, '<b>Popular Characters</b>', '</table>',
-                                maxRes=results*3)
-            sects += _findBetween(cont, '<b>Characters', '</table>',
-                                maxRes=results*3)
-            for sect in sects:
-                lis = _findBetween(sect, '<a href="/character/',
-                                    ['<small', '</td>', '<br'])
-                for li in lis:
-                    li = '<%s' % li
-                    pid = re_imdbID.findall(li)
-                    pname = _unHtml(li)
-                    if not (pid and pname):
-                        self._mobile_logger.debug('no name/characterID' \
-                                                ' parsing %s searching for' \
-                                                ' character %s', li, name)
-                        continue
-                    res.append((str(pid[0]), analyze_name(pname)))
+            lis = _findBetween(cont, '<td class="result_text"',
+                                ['<small', '</td>', '<br'])
+            for li in lis:
+                li = '<%s' % li
+                pid = re_imdbID.findall(li)
+                pname = _unHtml(li)
+                if not (pid and pname):
+                    self._mobile_logger.debug('no name/characterID' \
+                                            ' parsing %s searching for' \
+                                            ' character %s', li, name)
+                    continue
+                res.append((str(pid[0]), analyze_name(pname)))
        return res

    def get_character_main(self, characterID):
--- a/lib/imdb/parser/sql/init.py
+++ b/lib/imdb/parser/sql/init.py
@ -7,7 +7,7 @@ the SQLObject _AND_ SQLAlchemy Object Relational Managers is available.
 the imdb.IMDb function will return an instance of this class when
 called with the 'accessSystem' argument set to "sql", "database" or "db".

-Copyright 2005-2010 Davide Alberani <da@erlug.linux.it>
+Copyright 2005-2012 Davide Alberani <da@erlug.linux.it>

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@ -452,7 +452,12 @@ def get_movie_data(movieID, kindDict, fromAka=0, _table=None):
    else:
        if not fromAka: Table = Title
        else: Table = AkaTitle
-    m = Table.get(movieID)
+    try:
+        m = Table.get(movieID)
+    except Exception, e:
+        _aux_logger.warn('Unable to fetch information for movieID %s: %s', movieID, e)
+        mdict = {}
+        return mdict
    mdict = {'title': m.title, 'kind': kindDict[m.kindID],
            'year': m.productionYear, 'imdbIndex': m.imdbIndex,
            'season': m.seasonNr, 'episode': m.episodeNr}
@ -825,14 +830,14 @@ class IMDbSqlAccessSystem(IMDbBase):
        imdbID = movie.imdbID
        if imdbID is not None: return '%07d' % imdbID
        m_dict = get_movie_data(movie.id, self._kind)
-        titline = build_title(m_dict, ptdf=1)
-        imdbID = self.title2imdbID(titline)
+        titline = build_title(m_dict, ptdf=0)
+        imdbID = self.title2imdbID(titline, m_dict['kind'])
        # If the imdbID was retrieved from the web and was not in the
        # database, update the database (ignoring errors, because it's
        # possibile that the current user has not update privileges).
        # There're times when I think I'm a genius; this one of
        # those times... <g>
-        if imdbID is not None:
+        if imdbID is not None and not isinstance(imdbID, list):
            try: movie.imdbID = int(imdbID)
            except: pass
        return imdbID
@ -847,9 +852,9 @@ class IMDbSqlAccessSystem(IMDbBase):
        imdbID = person.imdbID
        if imdbID is not None: return '%07d' % imdbID
        n_dict = {'name': person.name, 'imdbIndex': person.imdbIndex}
-        namline = build_name(n_dict, canonical=1)
+        namline = build_name(n_dict, canonical=False)
        imdbID = self.name2imdbID(namline)
-        if imdbID is not None:
+        if imdbID is not None and not isinstance(imdbID, list):
            try: person.imdbID = int(imdbID)
            except: pass
        return imdbID
@ -864,9 +869,9 @@ class IMDbSqlAccessSystem(IMDbBase):
        imdbID = character.imdbID
        if imdbID is not None: return '%07d' % imdbID
        n_dict = {'name': character.name, 'imdbIndex': character.imdbIndex}
-        namline = build_name(n_dict, canonical=1)
+        namline = build_name(n_dict, canonical=False)
        imdbID = self.character2imdbID(namline)
-        if imdbID is not None:
+        if imdbID is not None and not isinstance(imdbID, list):
            try: character.imdbID = int(imdbID)
            except: pass
        return imdbID
@ -883,7 +888,7 @@ class IMDbSqlAccessSystem(IMDbBase):
        n_dict = {'name': company.name, 'country': company.countryCode}
        namline = build_company_name(n_dict)
        imdbID = self.company2imdbID(namline)
-        if imdbID is not None:
+        if imdbID is not None and not isinstance(imdbID, list):
            try: company.imdbID = int(imdbID)
            except: pass
        return imdbID
@ -1116,8 +1121,9 @@ class IMDbSqlAccessSystem(IMDbBase):
        if mlinks:
            for ml in mlinks:
                lmovieData = get_movie_data(ml[0], self._kind)
-                m = Movie(movieID=ml[0], data=lmovieData, accessSystem='sql')
-                ml[0] = m
+                if lmovieData:
+                    m = Movie(movieID=ml[0], data=lmovieData, accessSystem='sql')
+                    ml[0] = m
            res['connections'] = {}
            mlinks[:] = _groupListBy(mlinks, 1)
            for group in mlinks:
--- a/lib/imdb/parser/sql/alchemyadapter.py
+++ b/lib/imdb/parser/sql/alchemyadapter.py
@ -466,6 +466,7 @@ class _AlchemyConnection(object):

 def setConnection(uri, tables, encoding='utf8', debug=False):
    """Set connection for every table."""
+    params = {'encoding': encoding}
    # FIXME: why on earth MySQL requires an additional parameter,
    #        is well beyond my understanding...
    if uri.startswith('mysql'):
@ -474,7 +475,11 @@ def setConnection(uri, tables, encoding='utf8', debug=False):
        else:
            uri += '?'
        uri += 'charset=%s' % encoding
-    params = {'encoding': encoding}
+        
+        # On some server configurations, we will need to explictly enable
+        # loading data from local files
+        params['local_infile'] = 1
+   
    if debug:
        params['echo'] = True
    if uri.startswith('ibm_db'):
--- a/lib/imdb/parser/sql/cutils.so
+++ b/lib/imdb/parser/sql/cutils.so
--- a/lib/imdb/parser/sql/objectadapter.py
+++ b/lib/imdb/parser/sql/objectadapter.py
@ -182,6 +182,10 @@ def setConnection(uri, tables, encoding='utf8', debug=False):
        kw['use_unicode'] = 1
        #kw['sqlobject_encoding'] = encoding
        kw['charset'] = encoding
+
+        # On some server configurations, we will need to explictly enable
+        # loading data from local files
+        kw['local_infile'] = 1
    conn = connectionForURI(uri, **kw)
    conn.debug = debug
    # XXX: doesn't work and a work-around was put in imdbpy2sql.py;
--- a/lib/imdb/utils.py
+++ b/lib/imdb/utils.py
@ -3,7 +3,7 @@ utils module (imdb package).

 This module provides basic utilities for the imdb package.

-Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
+Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
               2009 H. Turgut Uyar <uyar@tekir.org>

 This program is free software; you can redistribute it and/or modify
@ -189,10 +189,9 @@ _unicodeArticles = linguistics.toUnicode(_articles)
 articlesDicts = linguistics.articlesDictsForLang(None)
 spArticles = linguistics.spArticlesForLang(None)

-def canonicalTitle(title, lang=None):
+def canonicalTitle(title, lang=None, imdbIndex=None):
    """Return the title in the canonic format 'Movie Title, The';
-    beware that it doesn't handle long imdb titles, but only the
-    title portion, without year[/imdbIndex] or special markup.
+    beware that it doesn't handle long imdb titles.
    The 'lang' argument can be used to specify the language of the title.
    """
    isUnicode = isinstance(title, unicode)
@ -203,15 +202,19 @@ def canonicalTitle(title, lang=None):
    except IndexError:
        pass
    if isUnicode:
-        _format = u'%s, %s'
+        _format = u'%s%s, %s'
    else:
-        _format = '%s, %s'
+        _format = '%s%s, %s'
    ltitle = title.lower()
+    if imdbIndex:
+        imdbIndex = ' (%s)' % imdbIndex
+    else:
+        imdbIndex = ''
    spArticles = linguistics.spArticlesForLang(lang)
    for article in spArticles[isUnicode]:
        if ltitle.startswith(article):
            lart = len(article)
-            title = _format % (title[lart:], title[:lart])
+            title = _format % (title[lart:], imdbIndex, title[:lart])
            if article[-1] == ' ':
                title = title[:-1]
            break
@ -383,18 +386,42 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
    if title.endswith('(TV)'):
        kind = u'tv movie'
        title = title[:-4].rstrip()
+    elif title.endswith('(TV Movie)'):
+        kind = u'tv movie'
+        title = title[:-10].rstrip()
    elif title.endswith('(V)'):
        kind = u'video movie'
        title = title[:-3].rstrip()
-    elif title.endswith('(video)'):
+    elif title.lower().endswith('(video)'):
        kind = u'video movie'
        title = title[:-7].rstrip()
+    elif title.endswith('(TV Short)'):
+        kind = u'tv short'
+        title = title[:-10].rstrip()
+    elif title.endswith('(TV Mini-Series)'):
+        kind = u'tv mini series'
+        title = title[:-16].rstrip()
    elif title.endswith('(mini)'):
        kind = u'tv mini series'
        title = title[:-6].rstrip()
    elif title.endswith('(VG)'):
        kind = u'video game'
        title = title[:-4].rstrip()
+    elif title.endswith('(Video Game)'):
+        kind = u'video game'
+        title = title[:-12].rstrip()
+    elif title.endswith('(TV Series)'):
+        epindex = title.find('(TV Episode) - ')
+        if epindex >= 0:
+            # It's an episode of a series.
+            kind = u'episode'
+            series_info = analyze_title(title[epindex + 15:])
+            result['episode of'] = series_info.get('title')
+            result['series year'] = series_info.get('year')
+            title = title[:epindex]
+        else:
+            kind = u'tv series'
+            title = title[:-11].rstrip()
    # Search for the year and the optional imdbIndex (a roman number).
    yi = re_year_index.findall(title)
    if not yi:
@ -430,9 +457,6 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
        if not kind:
            kind = u'tv series'
        title = title[1:-1].strip()
-    elif title.endswith('(TV series)'):
-        kind = u'tv series'
-        title = title[:-11].rstrip()
    if not title:
        raise IMDbParserError('invalid title: "%s"' % original_t)
    if canonical is not None:
@ -489,7 +513,7 @@ def _convertTime(title, fromPTDFtoWEB=1, _emptyString=u''):

 def build_title(title_dict, canonical=None, canonicalSeries=None,
                canonicalEpisode=None, ptdf=0, lang=None, _doYear=1,
-                _emptyString=u''):
+                _emptyString=u'', appendKind=True):
    """Given a dictionary that represents a "long" IMDb title,
    return a string.

@ -511,6 +535,11 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
        doYear = 0
        if ptdf:
            doYear = 1
+        # XXX: for results coming from the new search page.
+        if not isinstance(episode_of, (dict, _Container)):
+            episode_of = {'title': episode_of, 'kind': 'tv series'}
+            if 'series year' in title_dict:
+                episode_of['year'] = title_dict['series year']
        pre_title = build_title(episode_of, canonical=canonicalSeries,
                                ptdf=0, _doYear=doYear,
                                _emptyString=_emptyString)
@ -545,12 +574,14 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
                    episode_title += '.%s' % episode
                episode_title += ')'
            episode_title = '{%s}' % episode_title
-        return '%s %s' % (pre_title, episode_title)
+        return _emptyString + '%s %s' % (_emptyString + pre_title,
+                            _emptyString + episode_title)
    title = title_dict.get('title', '')
+    imdbIndex = title_dict.get('imdbIndex', '')
    if not title: return _emptyString
    if canonical is not None:
        if canonical:
-            title = canonicalTitle(title, lang=lang)
+            title = canonicalTitle(title, lang=lang, imdbIndex=imdbIndex)
        else:
            title = normalizeTitle(title, lang=lang)
    if pre_title:
@ -558,15 +589,20 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
    if kind in (u'tv series', u'tv mini series'):
        title = '"%s"' % title
    if _doYear:
-        imdbIndex = title_dict.get('imdbIndex')
-        year = title_dict.get('year') or u'????'
+        year = title_dict.get('year') or '????'
        if isinstance(_emptyString, str):
            year = str(year)
-        title += ' (%s' % year
-        if imdbIndex:
-            title += '/%s' % imdbIndex
-        title += ')'
-    if kind:
+        imdbIndex = title_dict.get('imdbIndex')
+        if not ptdf:
+            if imdbIndex and (canonical is None or canonical):
+                title += ' (%s)' % imdbIndex
+            title += ' (%s)' % year
+        else:
+            title += ' (%s' % year
+            if imdbIndex and (canonical is None or canonical):
+                title += '/%s' % imdbIndex
+            title += ')'
+    if appendKind and kind:
        if kind == 'tv movie':
            title += ' (TV)'
        elif kind == 'video movie':
--- a/lib/tvdb_api/tvdb_api.py
+++ b/lib/tvdb_api/tvdb_api.py
@ -11,6 +11,7 @@ __author__ = "dbr/Ben"
 __version__ = "1.9"

 import os
+import re
 import time
 import getpass
 import StringIO
@ -18,8 +19,10 @@ import tempfile
 import warnings
 import logging
 import zipfile
+import datetime as dt
 import requests
 import cachecontrol
+import xmltodict

 try:
    import xml.etree.cElementTree as ElementTree
@ -31,6 +34,7 @@ try:
 except ImportError:
    gzip = None

+from lib.dateutil.parser import parse
 from cachecontrol import caches

 from tvdb_ui import BaseUI, ConsoleUI
@ -560,44 +564,71 @@ class Tvdb:
        except requests.Timeout, e:
            raise tvdb_error("Connection timed out " + str(e.message) + " while loading URL " + str(url))

-        if 'application/zip' in resp.headers.get("Content-Type", '') and resp.ok:
-            try:
-                # TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20]
-                log().debug("We recived a zip file unpacking now ...")
-                zipdata = StringIO.StringIO()
-                zipdata.write(resp.content)
-                myzipfile = zipfile.ZipFile(zipdata)
-                return myzipfile.read('%s.xml' % language)
-            except zipfile.BadZipfile:
-                raise tvdb_error("Bad zip file received from thetvdb.com, could not read it")
+        def process(path, key, value):
+            key = key.lower()

-        return resp.content if resp.ok else None
+            # clean up value and do type changes
+            if value:
+                try:
+                    # convert to integer if needed
+                    if value.isdigit():
+                        value = int(value)
+                except:
+                    pass
+
+                if key in ['banner', 'fanart', 'poster']:
+                    value = self.config['url_artworkPrefix'] % (value)
+                else:
+                    value = self._cleanData(value)
+
+                try:
+                    if key == 'firstaired' and value in "0000-00-00":
+                        new_value = str(dt.date.fromordinal(1))
+                        new_value = re.sub("([-]0{2}){1,}", "", new_value)
+                        fixDate = parse(new_value, fuzzy=True).date()
+                        value = fixDate.strftime("%Y-%m-%d")
+                    elif key == 'firstaired':
+                        value = parse(value, fuzzy=True).date()
+                        value = value.strftime("%Y-%m-%d")
+                except:
+                    pass
+
+            value = self._cleanData(value)
+            return (key, value)
+
+        if resp.ok:
+            if 'application/zip' in resp.headers.get("Content-Type", ''):
+                try:
+                    # TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20]
+                    log().debug("We recived a zip file unpacking now ...")
+                    zipdata = StringIO.StringIO()
+                    zipdata.write(resp.content)
+                    myzipfile = zipfile.ZipFile(zipdata)
+                    return xmltodict.parse(myzipfile.read('%s.xml' % language), postprocessor=process)
+                except zipfile.BadZipfile:
+                    raise tvdb_error("Bad zip file received from thetvdb.com, could not read it")
+            else:
+                return xmltodict.parse(resp.text.strip(), postprocessor=process)

    def _getetsrc(self, url, params=None, language=None):
        """Loads a URL using caching, returns an ElementTree of the source
        """
-        src = self._loadUrl(url, params=params, language=language)
        try:
-            # TVDB doesn't sanitize \r (CR) from user input in some fields,
-            # remove it to avoid errors. Change from SickBeard, from will14m
-            return ElementTree.fromstring(src.rstrip("\r")) if src else None
-        except SyntaxError:
            src = self._loadUrl(url, params=params, language=language)
-            try:
-                return ElementTree.fromstring(src.rstrip("\r")) if src else None
-            except SyntaxError, exceptionmsg:
-                errormsg = "There was an error with the XML retrieved from thetvdb.com:\n%s" % (
-                    exceptionmsg
+            src = [src[item] for item in src][0]
+        except:
+            errormsg = "There was an error with the XML retrieved from thetvdb.com:"
+
+            if self.config['cache_enabled']:
+                errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
+                    self.config['cache_location']
                )

-                if self.config['cache_enabled']:
-                    errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
-                        self.config['cache_location']
-                    )
+            errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on"
+            errormsg += "\nhttp://dbr.lighthouseapp.com/projects/13342-tvdb_api/overview\n"
+            raise tvdb_error(errormsg)

-                errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on"
-                errormsg += "\nhttp://dbr.lighthouseapp.com/projects/13342-tvdb_api/overview\n"
-                raise tvdb_error(errormsg)
+        return src

    def _setItem(self, sid, seas, ep, attrib, value):
        """Creates a new episode, creating Show(), Season() and
@ -649,9 +680,8 @@ class Tvdb:
        log().debug("Searching for show %s" % series)
        self.config['params_getSeries']['seriesname'] = series
        seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
-        allSeries = list(dict((s.tag.lower(), s.text) for s in x.getchildren()) for x in seriesEt)

-        return allSeries
+        return [seriesEt[item] for item in seriesEt][0]

    def _getSeries(self, series):
        """This searches TheTVDB.com for the series name,
@ -798,24 +828,13 @@ class Tvdb:
            self.config['url_seriesInfo'] % (sid, getShowInLanguage)
        )

-        if seriesInfoEt is None: return False
-        for curInfo in seriesInfoEt.findall("Series")[0]:
-            tag = curInfo.tag.lower()
-            value = curInfo.text
+        # check and make sure we have data to process and that it contains a series name
+        if seriesInfoEt is None or 'seriesname' not in seriesInfoEt['series']:
+            return False

-            if tag == 'seriesname' and value is None:
-                return False
+        for k, v in seriesInfoEt['series'].items():
+            self._setShowData(sid, k, v)

-            if value is not None:
-                if tag == 'id':
-                    value = int(value)
-
-                if tag in ['banner', 'fanart', 'poster']:
-                    value = self.config['url_artworkPrefix'] % (value)
-                else:
-                    value = self._cleanData(value)
-
-            self._setShowData(sid, tag, value)
        if seriesSearch:
            return True

@ -837,63 +856,40 @@ class Tvdb:

        epsEt = self._getetsrc(url, language=language)

-        for cur_ep in epsEt.findall("Episode"):
-
+        for cur_ep in epsEt["episode"]:
            if self.config['dvdorder']:
                log().debug('Using DVD ordering.')
-                use_dvd = cur_ep.find('DVD_season').text != None and cur_ep.find('DVD_episodenumber').text != None
+                use_dvd = cur_ep['dvd_season'] != None and cur_ep['dvd_episodenumber'] != None
            else:
                use_dvd = False

            if use_dvd:
-                elem_seasnum, elem_epno = cur_ep.find('DVD_season'), cur_ep.find('DVD_episodenumber')
+                seasnum, epno = cur_ep['dvd_season'], cur_ep['dvd_episodenumber']
            else:
-                elem_seasnum, elem_epno = cur_ep.find('SeasonNumber'), cur_ep.find('EpisodeNumber')
-
-            if elem_seasnum is None or elem_epno is None:
+                seasnum, epno = cur_ep['seasonnumber'], cur_ep['episodenumber']

+            if seasnum is None or epno is None:
                log().warning("An episode has incomplete season/episode number (season: %r, episode: %r)" % (
-                    elem_seasnum, elem_epno))
-                log().debug(
-                    " ".join(
-                        "%r is %r" % (child.tag, child.text) for child in cur_ep.getchildren()))
-                # TODO: Should this happen?
+                    seasnum, epno))
                continue # Skip to next episode

-
            # float() is because https://github.com/dbr/tvnamer/issues/95 - should probably be fixed in TVDB data
-            seas_no = int(float(elem_seasnum.text))
-            ep_no = int(float(elem_epno.text))
+            seas_no = int(float(seasnum))
+            ep_no = int(float(epno))

-            useDVD = False
+            for k,v in cur_ep.items():
+                k = k.lower()

-            if (self.config['dvdorder']):
-                log().debug('DVD Order?  Yes')
-                useDVD = (cur_ep.find('DVD_season').text != None and cur_ep.find('DVD_episodenumber').text != None)
-            else:
-                log().debug('DVD Order? No')
+                if v is not None:
+                    if k == 'id':
+                        v = int(v)

-            if (useDVD):
-                log().debug('Use DVD Order? Yes')
-                seas_no = int(cur_ep.find('DVD_season').text)
-                ep_no = int(float(cur_ep.find('DVD_episodenumber').text))
-            else:
-                log().debug('Use DVD Order? No')
-                seas_no = int(cur_ep.find('SeasonNumber').text)
-                ep_no = int(cur_ep.find('EpisodeNumber').text)
-
-            for cur_item in cur_ep.getchildren():
-                tag = cur_item.tag.lower()
-                value = cur_item.text
-                if value is not None:
-                    if tag == 'id':
-                        value = int(value)
-
-                    if tag == 'filename':
-                        value = self.config['url_artworkPrefix'] % (value)
+                    if k == 'filename':
+                        v = self.config['url_artworkPrefix'] % (v)
                    else:
-                        value = self._cleanData(value)
-                self._setItem(sid, seas_no, ep_no, tag, value)
+                        v = self._cleanData(v)
+
+                    self._setItem(sid, seas_no, ep_no, k, v)

        return True

--- a/lib/tvrage_api/tvrage_api.py
+++ b/lib/tvrage_api/tvrage_api.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+# !/usr/bin/env python2
 #encoding:utf-8
 #author:echel0n
 #project:tvrage_api
@ -24,6 +24,7 @@ import logging
 import datetime as dt
 import requests
 import cachecontrol
+import xmltodict

 try:
    import xml.etree.cElementTree as ElementTree
@ -35,11 +36,13 @@ from cachecontrol import caches

 from tvrage_ui import BaseUI
 from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound,
-    tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound)
+                               tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound)
+

 def log():
    return logging.getLogger("tvrage_api")

+
 def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
    """Retry calling the decorated function using an exponential backoff.

@ -83,6 +86,7 @@ def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):

    return deco_retry

+
 class ShowContainer(dict):
    """Simple dict that holds a series of Show instances
    """
@ -105,13 +109,14 @@ class ShowContainer(dict):

            _lastgc = time.time()
            del tbd
-                    
+
        super(ShowContainer, self).__setitem__(key, value)


 class Show(dict):
    """Holds a dict of seasons, and show data.
    """
+
    def __init__(self):
        dict.__init__(self)
        self.data = {}
@ -157,7 +162,7 @@ class Show(dict):
            raise tvrage_episodenotfound("Could not find any episodes that aired on %s" % date)
        return ret

-    def search(self, term = None, key = None):
+    def search(self, term=None, key=None):
        """
        Search all episodes in show. Can search all data, or a specific key (for
        example, episodename)
@ -173,7 +178,7 @@ class Show(dict):
        """
        results = []
        for cur_season in self.values():
-            searchresult = cur_season.search(term = term, key = key)
+            searchresult = cur_season.search(term=term, key=key)
            if len(searchresult) != 0:
                results.extend(searchresult)

@ -181,7 +186,7 @@ class Show(dict):


 class Season(dict):
-    def __init__(self, show = None):
+    def __init__(self, show=None):
        """The show attribute points to the parent show
        """
        self.show = show
@ -202,13 +207,13 @@ class Season(dict):
        else:
            return dict.__getitem__(self, episode_number)

-    def search(self, term = None, key = None):
+    def search(self, term=None, key=None):
        """Search all episodes in season, returns a list of matching Episode
        instances.
        """
        results = []
        for ep in self.values():
-            searchresult = ep.search(term = term, key = key)
+            searchresult = ep.search(term=term, key=key)
            if searchresult is not None:
                results.append(
                    searchresult
@ -217,7 +222,7 @@ class Season(dict):


 class Episode(dict):
-    def __init__(self, season = None):
+    def __init__(self, season=None):
        """The season attribute points to the parent season
        """
        self.season = season
@ -242,7 +247,7 @@ class Episode(dict):
        except KeyError:
            raise tvrage_attributenotfound("Cannot find attribute %s" % (repr(key)))

-    def search(self, term = None, key = None):
+    def search(self, term=None, key=None):
        """Search episode data for term, if it matches, return the Episode (self).
        The key parameter can be used to limit the search to a specific element,
        for example, episodename.
@ -258,25 +263,27 @@ class Episode(dict):
            if key is not None and cur_key != key:
                # Do not search this key
                continue
-            if cur_value.find( unicode(term).lower() ) > -1:
+            if cur_value.find(unicode(term).lower()) > -1:
                return self

+
 class TVRage:
    """Create easy-to-use interface to name of season/episode name"""
+
    def __init__(self,
-                interactive = False,
-                select_first = False,
-                debug = False,
-                cache = True,
-                banners = False,
-                actors = False,
-                custom_ui = None,
-                language = None,
-                search_all_languages = False,
-                apikey = None,
-                forceConnect=False,
-                useZip=False,
-                dvdorder=False):
+                 interactive=False,
+                 select_first=False,
+                 debug=False,
+                 cache=True,
+                 banners=False,
+                 actors=False,
+                 custom_ui=None,
+                 language=None,
+                 search_all_languages=False,
+                 apikey=None,
+                 forceConnect=False,
+                 useZip=False,
+                 dvdorder=False):

        """
        cache (True/False/str/unicode/urllib2 opener):
@ -294,18 +301,18 @@ class TVRage:
            return an exception immediately.
        """

-        self.shows = ShowContainer() # Holds all Show classes
-        self.corrections = {} # Holds show-name to show_id mapping
-        self.sess = requests.session() # HTTP Session
+        self.shows = ShowContainer()  # Holds all Show classes
+        self.corrections = {}  # Holds show-name to show_id mapping
+        self.sess = requests.session()  # HTTP Session

        self.config = {}

        if apikey is not None:
            self.config['apikey'] = apikey
        else:
-            self.config['apikey'] = "Uhewg1Rr0o62fvZvUIZt" # tvdb_api's API key
+            self.config['apikey'] = "Uhewg1Rr0o62fvZvUIZt"  # tvdb_api's API key

-        self.config['debug_enabled'] = debug # show debugging messages
+        self.config['debug_enabled'] = debug  # show debugging messages

        self.config['custom_ui'] = custom_ui

@ -322,8 +329,8 @@ class TVRage:

        if self.config['debug_enabled']:
            warnings.warn("The debug argument to tvrage_api.__init__ will be removed in the next version. "
-            "To enable debug messages, use the following code before importing: "
-            "import logging; logging.basicConfig(level=logging.DEBUG)")
+                          "To enable debug messages, use the following code before importing: "
+                          "import logging; logging.basicConfig(level=logging.DEBUG)")
            logging.basicConfig(level=logging.DEBUG)


@ -331,8 +338,8 @@ class TVRage:
        # Hard-coded here as it is realtively static, and saves another HTTP request, as
        # recommended on http://tvrage.com/wiki/index.php/API:languages.xml
        self.config['valid_languages'] = [
-            "da", "fi", "nl", "de", "it", "es", "fr","pl", "hu","el","tr",
-            "ru","he","ja","pt","zh","cs","sl", "hr","ko","en","sv","no"
+            "da", "fi", "nl", "de", "it", "es", "fr", "pl", "hu", "el", "tr",
+            "ru", "he", "ja", "pt", "zh", "cs", "sl", "hr", "ko", "en", "sv", "no"
        ]

        # tvrage.com should be based around numeric language codes,
@ -340,9 +347,9 @@ class TVRage:
        # requires the language ID, thus this mapping is required (mainly
        # for usage in tvrage_ui - internally tvrage_api will use the language abbreviations)
        self.config['langabbv_to_id'] = {'el': 20, 'en': 7, 'zh': 27,
-        'it': 15, 'cs': 28, 'es': 16, 'ru': 22, 'nl': 13, 'pt': 26, 'no': 9,
-        'tr': 21, 'pl': 18, 'fr': 17, 'hr': 31, 'de': 14, 'da': 10, 'fi': 11,
-        'hu': 19, 'ja': 25, 'he': 24, 'ko': 32, 'sv': 8, 'sl': 30}
+                                         'it': 15, 'cs': 28, 'es': 16, 'ru': 22, 'nl': 13, 'pt': 26, 'no': 9,
+                                         'tr': 21, 'pl': 18, 'fr': 17, 'hr': 31, 'de': 14, 'da': 10, 'fi': 11,
+                                         'hu': 19, 'ja': 25, 'he': 24, 'ko': 32, 'sv': 8, 'sl': 30}

        if language is None:
            self.config['language'] = 'en'
@ -390,9 +397,9 @@ class TVRage:

            # get response from TVRage
            if self.config['cache_enabled']:
-                resp = self.sess.get(url, cache_auto=True, params=params)
+                resp = self.sess.get(url.strip(), cache_auto=True, params=params)
            else:
-                resp = requests.get(url, params=params)
+                resp = requests.get(url.strip(), params=params)

        except requests.HTTPError, e:
            raise tvrage_error("HTTP error " + str(e.errno) + " while loading URL " + str(url))
@ -403,81 +410,84 @@ class TVRage:
        except requests.Timeout, e:
            raise tvrage_error("Connection timed out " + str(e.message) + " while loading URL " + str(url))

-        return resp.content if resp.ok else None
+        def remap_keys(path, key, value):
+            name_map = {
+                'showid': 'id',
+                'showname': 'seriesname',
+                'name': 'seriesname',
+                'summary': 'overview',
+                'started': 'firstaired',
+                'genres': 'genre',
+                'airtime': 'airs_time',
+                'airday': 'airs_dayofweek',
+                'image': 'fanart',
+                'epnum': 'absolute_number',
+                'title': 'episodename',
+                'airdate': 'firstaired',
+                'screencap': 'filename',
+                'seasonnum': 'episodenumber'
+            }
+
+            try:
+                key = name_map[key.lower()]
+            except (ValueError, TypeError, KeyError):
+                key.lower()
+
+            # clean up value and do type changes
+            if value:
+                if isinstance(value, dict):
+                    if key == 'network':
+                        value = value['#text']
+                    if key == 'genre':
+                        value = value['genre']
+                        if not isinstance(value, list):
+                            value = [value]
+                        value = '|' + '|'.join(value) + '|'
+
+                try:
+                    # convert to integer if needed
+                    if value.isdigit():
+                        value = int(value)
+                except:
+                    pass
+
+                try:
+                    if key == 'firstaired' and value in "0000-00-00":
+                        new_value = str(dt.date.fromordinal(1))
+                        new_value = re.sub("([-]0{2}){1,}", "", new_value)
+                        fixDate = parse(new_value, fuzzy=True).date()
+                        value = fixDate.strftime("%Y-%m-%d")
+                    elif key == 'firstaired':
+                        value = parse(value, fuzzy=True).date()
+                        value = value.strftime("%Y-%m-%d")
+                except:
+                    pass
+
+            value = self._cleanData(value)
+            return (key, value)
+
+        if resp.ok:
+            return xmltodict.parse(resp.text.strip(), postprocessor=remap_keys)

    def _getetsrc(self, url, params=None):
        """Loads a URL using caching, returns an ElementTree of the source
        """
-        reDict = {
-            'showid': 'id',
-            'showname': 'seriesname',
-            'name': 'seriesname',
-            'summary': 'overview',
-            'started': 'firstaired',
-            'genres': 'genre',
-            'airtime': 'airs_time',
-            'airday': 'airs_dayofweek',
-            'image': 'fanart',
-            'epnum': 'absolute_number',
-            'title': 'episodename',
-            'airdate': 'firstaired',
-            'screencap': 'filename',
-            'seasonnum': 'episodenumber',
-        }

-        robj = re.compile('|'.join(reDict.keys()))
-        src = self._loadUrl(url, params)
        try:
-            # TVRAGE doesn't sanitize \r (CR) from user input in some fields,
-            # remove it to avoid errors. Change from SickBeard, from will14m
-            xml = ElementTree.fromstring(src.rstrip("\r"))
-            tree = ElementTree.ElementTree(xml)
-            for elm in tree.findall('.//*'):
-                elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag)
-
-                if elm.tag in 'firstaired':
-                    try:
-                        if elm.text in "0000-00-00":
-                            elm.text = str(dt.date.fromordinal(1))
-                        elm.text = re.sub("([-]0{2}){1,}", "", elm.text)
-                        fixDate = parse(elm.text, fuzzy=True).date()
-                        elm.text = fixDate.strftime("%Y-%m-%d")
-                    except:
-                        pass
-            return ElementTree.fromstring(ElementTree.tostring(xml))
-        except SyntaxError:
            src = self._loadUrl(url, params)
-            try:
-                xml = ElementTree.fromstring(src.rstrip("\r"))
-                tree = ElementTree.ElementTree(xml)
-                for elm in tree.findall('.//*'):
-                    elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag)
+            src = [src[item] for item in src][0]
+        except:
+            errormsg = "There was an error with the XML retrieved from tvrage.com"

-                    if elm.tag in 'firstaired' and elm.text:
-                        if elm.text == "0000-00-00":
-                            elm.text = str(dt.date.fromordinal(1))
-                        try:
-                            #month = strptime(match.group('air_month')[:3],'%b').tm_mon
-                            #day = re.sub("(st|nd|rd|th)", "", match.group('air_day'))
-                            #dtStr = '%s/%s/%s' % (year, month, day)
-
-                            fixDate = parse(elm.text, fuzzy=True)
-                            elm.text = fixDate.strftime("%Y-%m-%d")
-                        except:
-                            pass
-                    return ElementTree.fromstring(ElementTree.tostring(xml))
-            except SyntaxError, exceptionmsg:
-                errormsg = "There was an error with the XML retrieved from tvrage.com:\n%s" % (
-                    exceptionmsg
+            if self.config['cache_enabled']:
+                errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
+                    self.config['cache_location']
                )

-                if self.config['cache_enabled']:
-                    errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
-                        self.config['cache_location']
-                    )
+            errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on\n"
+            raise tvrage_error(errormsg)

-                errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on\n"
-                raise tvrage_error(errormsg)
+        return src

    def _setItem(self, sid, seas, ep, attrib, value):
        """Creates a new episode, creating Show(), Season() and
@ -497,9 +507,9 @@ class TVRage:
        if sid not in self.shows:
            self.shows[sid] = Show()
        if seas not in self.shows[sid]:
-            self.shows[sid][seas] = Season(show = self.shows[sid])
+            self.shows[sid][seas] = Season(show=self.shows[sid])
        if ep not in self.shows[sid][seas]:
-            self.shows[sid][seas][ep] = Episode(season = self.shows[sid][seas])
+            self.shows[sid][seas][ep] = Episode(season=self.shows[sid][seas])
        self.shows[sid][seas][ep][attrib] = value

    def _setShowData(self, sid, key, value):
@ -529,9 +539,8 @@ class TVRage:
        log().debug("Searching for show %s" % series)
        self.config['params_getSeries']['show'] = series
        seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
-        allSeries = list(dict((s.tag.lower(),s.text) for s in x.getchildren()) for x in seriesEt)

-        return allSeries
+        return [seriesEt[item] for item in seriesEt][0]

    def _getSeries(self, series):
        """This searches tvrage.com for the series name,
@ -547,10 +556,10 @@ class TVRage:

        if self.config['custom_ui'] is not None:
            log().debug("Using custom UI %s" % (repr(self.config['custom_ui'])))
-            ui = self.config['custom_ui'](config = self.config)
+            ui = self.config['custom_ui'](config=self.config)
        else:
            log().debug('Auto-selecting first search result using BaseUI')
-            ui = BaseUI(config = self.config)
+            ui = BaseUI(config=self.config)

        return ui.selectSeries(allSeries)

@ -568,62 +577,49 @@ class TVRage:
            self.config['params_seriesInfo']
        )

-        if seriesInfoEt is None: return False
-        for curInfo in seriesInfoEt:
-            tag = curInfo.tag.lower()
-            value = curInfo.text
+        # check and make sure we have data to process and that it contains a series name
+        if seriesInfoEt is None or 'seriesname' not in seriesInfoEt:
+            return False

-            if tag == 'seriesname' and value is None:
-                return False
+        for k, v in seriesInfoEt.items():
+            self._setShowData(sid, k, v)

-            if tag == 'id':
-                value = int(value)
-
-            if value is not None:
-                value = self._cleanData(value)
-
-            self._setShowData(sid, tag, value)
-        if seriesSearch: return True
-
-        try:
-            # Parse genre data
-            log().debug('Getting genres of %s' % (sid))
-            for genre in seriesInfoEt.find('genres'):
-                tag = genre.tag.lower()
-
-                value = genre.text
-                if value is not None:
-                    value = self._cleanData(value)
-
-                self._setShowData(sid, tag, value)
-        except Exception:
-            log().debug('No genres for %s' % (sid))
+        # series search ends here
+        if seriesSearch:
+            return True

        # Parse episode data
        log().debug('Getting all episodes of %s' % (sid))

        self.config['params_epInfo']['sid'] = sid
        epsEt = self._getetsrc(self.config['url_epInfo'], self.config['params_epInfo'])
-        for cur_list in epsEt.findall("Episodelist"):
-            for cur_seas in cur_list:
-                try:
-                    seas_no = int(cur_seas.attrib['no'])
-                    for cur_ep in cur_seas:
-                        ep_no = int(cur_ep.find('episodenumber').text)
-                        self._setItem(sid, seas_no, ep_no, 'seasonnumber', seas_no)
-                        for cur_item in cur_ep:
-                            tag = cur_item.tag.lower()

-                            value = cur_item.text
-                            if value is not None:
-                                if tag == 'id':
-                                    value = int(value)
+        for season in epsEt['Episodelist']['Season']:
+            episodes =  season['episode']
+            if not isinstance(episodes, list):
+                episodes = [episodes]

-                                value = self._cleanData(value)
+            for episode in episodes:
+                seas_no = int(season['@no'])
+                ep_no = int(episode['episodenumber'])
+                self._setItem(sid, seas_no, ep_no, 'seasonnumber', seas_no)

-                            self._setItem(sid, seas_no, ep_no, tag, value)
-                except:
-                    continue
+                for k,v in episode.items():
+                    try:
+                        k = k.lower()
+                        if v is not None:
+                            if k == 'link':
+                                v = v.rsplit('/', 1)[1]
+                                k = 'id'
+
+                            if k == 'id':
+                                v = int(v)
+
+                            v = self._cleanData(v)
+
+                        self._setItem(sid, seas_no, ep_no, k, v)
+                    except:
+                        continue
        return True

    def _nameToSid(self, name):
@ -632,7 +628,7 @@ class TVRage:
        the correct SID.
        """
        if name in self.corrections:
-            log().debug('Correcting %s to %s' % (name, self.corrections[name]) )
+            log().debug('Correcting %s to %s' % (name, self.corrections[name]))
            return self.corrections[name]
        else:
            log().debug('Getting show %s' % (name))
@ -673,11 +669,13 @@ def main():
    grabs an episode name interactively.
    """
    import logging
+
    logging.basicConfig(level=logging.DEBUG)

    tvrage_instance = TVRage(cache=False)
    print tvrage_instance['Lost']['seriesname']
    print tvrage_instance['Lost'][1][4]['episodename']

+
 if __name__ == '__main__':
    main()
--- a/lib/xmltodict.py
+++ b/lib/xmltodict.py
@ -0,0 +1,359 @@
+#!/usr/bin/env python
+"Makes working with XML feel like you are working with JSON"
+
+from xml.parsers import expat
+from xml.sax.saxutils import XMLGenerator
+from xml.sax.xmlreader import AttributesImpl
+try:  # pragma no cover
+    from cStringIO import StringIO
+except ImportError:  # pragma no cover
+    try:
+        from StringIO import StringIO
+    except ImportError:
+        from io import StringIO
+try:  # pragma no cover
+    from collections import OrderedDict
+except ImportError:  # pragma no cover
+    try:
+        from ordereddict import OrderedDict
+    except ImportError:
+        OrderedDict = dict
+
+try:  # pragma no cover
+    _basestring = basestring
+except NameError:  # pragma no cover
+    _basestring = str
+try:  # pragma no cover
+    _unicode = unicode
+except NameError:  # pragma no cover
+    _unicode = str
+
+__author__ = 'Martin Blech'
+__version__ = '0.9.0'
+__license__ = 'MIT'
+
+
+class ParsingInterrupted(Exception):
+    pass
+
+
+class _DictSAXHandler(object):
+    def __init__(self,
+                 item_depth=0,
+                 item_callback=lambda *args: True,
+                 xml_attribs=True,
+                 attr_prefix='@',
+                 cdata_key='#text',
+                 force_cdata=False,
+                 cdata_separator='',
+                 postprocessor=None,
+                 dict_constructor=OrderedDict,
+                 strip_whitespace=True,
+                 namespace_separator=':',
+                 namespaces=None):
+        self.path = []
+        self.stack = []
+        self.data = None
+        self.item = None
+        self.item_depth = item_depth
+        self.xml_attribs = xml_attribs
+        self.item_callback = item_callback
+        self.attr_prefix = attr_prefix
+        self.cdata_key = cdata_key
+        self.force_cdata = force_cdata
+        self.cdata_separator = cdata_separator
+        self.postprocessor = postprocessor
+        self.dict_constructor = dict_constructor
+        self.strip_whitespace = strip_whitespace
+        self.namespace_separator = namespace_separator
+        self.namespaces = namespaces
+
+    def _build_name(self, full_name):
+        if not self.namespaces:
+            return full_name
+        i = full_name.rfind(self.namespace_separator)
+        if i == -1:
+            return full_name
+        namespace, name = full_name[:i], full_name[i+1:]
+        short_namespace = self.namespaces.get(namespace, namespace)
+        if not short_namespace:
+            return name
+        else:
+            return self.namespace_separator.join((short_namespace, name))
+
+    def _attrs_to_dict(self, attrs):
+        if isinstance(attrs, dict):
+            return attrs
+        return self.dict_constructor(zip(attrs[0::2], attrs[1::2]))
+
+    def startElement(self, full_name, attrs):
+        name = self._build_name(full_name)
+        attrs = self._attrs_to_dict(attrs)
+        self.path.append((name, attrs or None))
+        if len(self.path) > self.item_depth:
+            self.stack.append((self.item, self.data))
+            if self.xml_attribs:
+                attrs = self.dict_constructor(
+                    (self.attr_prefix+key, value)
+                    for (key, value) in attrs.items())
+            else:
+                attrs = None
+            self.item = attrs or None
+            self.data = None
+
+    def endElement(self, full_name):
+        name = self._build_name(full_name)
+        if len(self.path) == self.item_depth:
+            item = self.item
+            if item is None:
+                item = self.data
+            should_continue = self.item_callback(self.path, item)
+            if not should_continue:
+                raise ParsingInterrupted()
+        if len(self.stack):
+            item, data = self.item, self.data
+            self.item, self.data = self.stack.pop()
+            if self.strip_whitespace and data is not None:
+                data = data.strip() or None
+            if data and self.force_cdata and item is None:
+                item = self.dict_constructor()
+            if item is not None:
+                if data:
+                    self.push_data(item, self.cdata_key, data)
+                self.item = self.push_data(self.item, name, item)
+            else:
+                self.item = self.push_data(self.item, name, data)
+        else:
+            self.item = self.data = None
+        self.path.pop()
+
+    def characters(self, data):
+        if not self.data:
+            self.data = data
+        else:
+            self.data += self.cdata_separator + data
+
+    def push_data(self, item, key, data):
+        if self.postprocessor is not None:
+            result = self.postprocessor(self.path, key, data)
+            if result is None:
+                return item
+            key, data = result
+        if item is None:
+            item = self.dict_constructor()
+        try:
+            value = item[key]
+            if isinstance(value, list):
+                value.append(data)
+            else:
+                item[key] = [value, data]
+        except KeyError:
+            item[key] = data
+        return item
+
+
+def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
+          namespace_separator=':', **kwargs):
+    """Parse the given XML input and convert it into a dictionary.
+
+    `xml_input` can either be a `string` or a file-like object.
+
+    If `xml_attribs` is `True`, element attributes are put in the dictionary
+    among regular child elements, using `@` as a prefix to avoid collisions. If
+    set to `False`, they are just ignored.
+
+    Simple example::
+
+        >>> import xmltodict
+        >>> doc = xmltodict.parse(\"\"\"
+        ... <a prop="x">
+        ...   <b>1</b>
+        ...   <b>2</b>
+        ... </a>
+        ... \"\"\")
+        >>> doc['a']['@prop']
+        u'x'
+        >>> doc['a']['b']
+        [u'1', u'2']
+
+    If `item_depth` is `0`, the function returns a dictionary for the root
+    element (default behavior). Otherwise, it calls `item_callback` every time
+    an item at the specified depth is found and returns `None` in the end
+    (streaming mode).
+
+    The callback function receives two parameters: the `path` from the document
+    root to the item (name-attribs pairs), and the `item` (dict). If the
+    callback's return value is false-ish, parsing will be stopped with the
+    :class:`ParsingInterrupted` exception.
+
+    Streaming example::
+
+        >>> def handle(path, item):
+        ...     print 'path:%s item:%s' % (path, item)
+        ...     return True
+        ...
+        >>> xmltodict.parse(\"\"\"
+        ... <a prop="x">
+        ...   <b>1</b>
+        ...   <b>2</b>
+        ... </a>\"\"\", item_depth=2, item_callback=handle)
+        path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
+        path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2
+
+    The optional argument `postprocessor` is a function that takes `path`,
+    `key` and `value` as positional arguments and returns a new `(key, value)`
+    pair where both `key` and `value` may have changed. Usage example::
+
+        >>> def postprocessor(path, key, value):
+        ...     try:
+        ...         return key + ':int', int(value)
+        ...     except (ValueError, TypeError):
+        ...         return key, value
+        >>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
+        ...                 postprocessor=postprocessor)
+        OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))])
+
+    You can pass an alternate version of `expat` (such as `defusedexpat`) by
+    using the `expat` parameter. E.g:
+
+        >>> import defusedexpat
+        >>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
+        OrderedDict([(u'a', u'hello')])
+
+    """
+    handler = _DictSAXHandler(namespace_separator=namespace_separator,
+                              **kwargs)
+    if isinstance(xml_input, _unicode):
+        if not encoding:
+            encoding = 'utf-8'
+        xml_input = xml_input.encode(encoding)
+    if not process_namespaces:
+        namespace_separator = None
+    parser = expat.ParserCreate(
+        encoding,
+        namespace_separator
+    )
+    try:
+        parser.ordered_attributes = True
+    except AttributeError:
+        # Jython's expat does not support ordered_attributes
+        pass
+    parser.StartElementHandler = handler.startElement
+    parser.EndElementHandler = handler.endElement
+    parser.CharacterDataHandler = handler.characters
+    parser.buffer_text = True
+    try:
+        parser.ParseFile(xml_input)
+    except (TypeError, AttributeError):
+        parser.Parse(xml_input, True)
+    return handler.item
+
+
+def _emit(key, value, content_handler,
+          attr_prefix='@',
+          cdata_key='#text',
+          depth=0,
+          preprocessor=None,
+          pretty=False,
+          newl='\n',
+          indent='\t'):
+    if preprocessor is not None:
+        result = preprocessor(key, value)
+        if result is None:
+            return
+        key, value = result
+    if not isinstance(value, (list, tuple)):
+        value = [value]
+    if depth == 0 and len(value) > 1:
+        raise ValueError('document with multiple roots')
+    for v in value:
+        if v is None:
+            v = OrderedDict()
+        elif not isinstance(v, dict):
+            v = _unicode(v)
+        if isinstance(v, _basestring):
+            v = OrderedDict(((cdata_key, v),))
+        cdata = None
+        attrs = OrderedDict()
+        children = []
+        for ik, iv in v.items():
+            if ik == cdata_key:
+                cdata = iv
+                continue
+            if ik.startswith(attr_prefix):
+                attrs[ik[len(attr_prefix):]] = iv
+                continue
+            children.append((ik, iv))
+        if pretty:
+            content_handler.ignorableWhitespace(depth * indent)
+        content_handler.startElement(key, AttributesImpl(attrs))
+        if pretty and children:
+            content_handler.ignorableWhitespace(newl)
+        for child_key, child_value in children:
+            _emit(child_key, child_value, content_handler,
+                  attr_prefix, cdata_key, depth+1, preprocessor,
+                  pretty, newl, indent)
+        if cdata is not None:
+            content_handler.characters(cdata)
+        if pretty and children:
+            content_handler.ignorableWhitespace(depth * indent)
+        content_handler.endElement(key)
+        if pretty and depth:
+            content_handler.ignorableWhitespace(newl)
+
+
+def unparse(input_dict, output=None, encoding='utf-8', full_document=True,
+            **kwargs):
+    """Emit an XML document for the given `input_dict` (reverse of `parse`).
+
+    The resulting XML document is returned as a string, but if `output` (a
+    file-like object) is specified, it is written there instead.
+
+    Dictionary keys prefixed with `attr_prefix` (default=`'@'`) are interpreted
+    as XML node attributes, whereas keys equal to `cdata_key`
+    (default=`'#text'`) are treated as character data.
+
+    The `pretty` parameter (default=`False`) enables pretty-printing. In this
+    mode, lines are terminated with `'\n'` and indented with `'\t'`, but this
+    can be customized with the `newl` and `indent` parameters.
+
+    """
+    ((key, value),) = input_dict.items()
+    must_return = False
+    if output is None:
+        output = StringIO()
+        must_return = True
+    content_handler = XMLGenerator(output, encoding)
+    if full_document:
+        content_handler.startDocument()
+    _emit(key, value, content_handler, **kwargs)
+    if full_document:
+        content_handler.endDocument()
+    if must_return:
+        value = output.getvalue()
+        try:  # pragma no cover
+            value = value.decode(encoding)
+        except AttributeError:  # pragma no cover
+            pass
+        return value
+
+if __name__ == '__main__':  # pragma: no cover
+    import sys
+    import marshal
+
+    (item_depth,) = sys.argv[1:]
+    item_depth = int(item_depth)
+
+    def handle_item(path, item):
+        marshal.dump((path, item), sys.stdout)
+        return True
+
+    try:
+        root = parse(sys.stdin,
+                     item_depth=item_depth,
+                     item_callback=handle_item,
+                     dict_constructor=dict)
+        if item_depth == 0:
+            handle_item([], root)
+    except KeyboardInterrupt:
+        pass
--- a/sickbeard/metadata/generic.py
+++ b/sickbeard/metadata/generic.py
@ -782,14 +782,10 @@ class GenericMetadata():

        # Try and get posters and fanart from TMDB
        if image_url is None:
-            for show_name in set(allPossibleShowNames(show_obj)):
-                if image_type in ('poster', 'poster_thumb'):
-                    image_url = self._retrieve_show_images_from_tmdb(show_obj, poster=True)
-                elif image_type == 'fanart':
-                    image_url = self._retrieve_show_images_from_tmdb(show_obj, backdrop=True)
-
-                if image_url:
-                    break
+            if image_type in ('poster', 'poster_thumb'):
+                image_url = self._retrieve_show_images_from_tmdb(show_obj, poster=True)
+            elif image_type == 'fanart':
+                image_url = self._retrieve_show_images_from_tmdb(show_obj, backdrop=True)

        if image_url:
            image_data = metadata_helpers.getShowImage(image_url, which)
@ -965,8 +961,6 @@ class GenericMetadata():
        return (indexer_id, name, indexer)

    def _retrieve_show_images_from_tmdb(self, show, backdrop=False, poster=False):
-        tmdb_id = None
-
        # get TMDB configuration info
        tmdb = TMDB(sickbeard.TMDB_API_KEY)
        config = tmdb.Configuration()
@ -981,27 +975,14 @@ class GenericMetadata():

        try:
            search = tmdb.Search()
-            for result in search.collection({'query': show.name}) + search.tv({'query': show.name}):
-                tmdb_id = result['id']
-                external_ids = tmdb.TV(tmdb_id).external_ids()
-                if show.indexerid in [external_ids['tvdb_id'], external_ids['tvrage_id']]:
-                    break
+            for show_name in set(allPossibleShowNames(show)):
+                for result in search.collection({'query': show_name})['results'] + search.tv({'query': show_name})['results']:
+                    if backdrop and result['backdrop_path']:
+                        return "{0}{1}{2}".format(base_url, max_size, result['backdrop_path'])
+                    elif poster and result['poster_path']:
+                        return "{0}{1}{2}".format(base_url, max_size, result['poster_path'])

-            if tmdb_id:
-                images = tmdb.Collections(tmdb_id).images()
-                if len(images) > 0:
-                    # get backdrop urls
-                    if backdrop:
-                        rel_path = images['backdrops'][0]['file_path']
-                        url = "{0}{1}{2}".format(base_url, max_size, rel_path)
-                        return url
-
-                    # get poster urls
-                    if poster:
-                        rel_path = images['posters'][0]['file_path']
-                        url = "{0}{1}{2}".format(base_url, max_size, rel_path)
-                        return url
-        except:
+        except Exception, e:
            pass

        logger.log(u"Could not find any posters or background for " + show.name, logger.DEBUG)
--- a/sickbeard/tv.py
+++ b/sickbeard/tv.py
@ -829,7 +829,7 @@ class TVShow(object):
            self.airs = myEp["airs_dayofweek"] + " " + myEp["airs_time"]

        if getattr(myEp, 'firstaired', None) is not None:
-            self.startyear = int(myEp["firstaired"].split('-')[0])
+            self.startyear = int(str(myEp["firstaired"]).split('-')[0])

        self.status = getattr(myEp, 'status', '')

@ -855,7 +855,6 @@ class TVShow(object):
            i = imdb.IMDb()
            imdbTv = i.get_movie(str(re.sub("[^0-9]", "", self.imdbid)))

-            test = imdbTv.keys()
            for key in filter(lambda x: x.replace('_', ' ') in imdbTv.keys(), imdb_info.keys()):
                # Store only the first value for string type
                if type(imdb_info[key]) == type('') and type(imdbTv.get(key)) == type([]):
@ -1556,7 +1555,7 @@ class TVEpisode(object):
                self.deleteEpisode()
            return False

-        if myEp["absolute_number"] == None or myEp["absolute_number"] == "":
+        if getattr(myEp, 'absolute_number', None) is None:
            logger.log(u"This episode (" + self.show.name + " - " + str(season) + "x" + str(
                episode) + ") has no absolute number on " + sickbeard.indexerApi(
                self.indexer).name
@ -1564,7 +1563,7 @@ class TVEpisode(object):
        else:
            logger.log(
                str(self.show.indexerid) + ": The absolute_number for " + str(season) + "x" + str(episode) + " is : " +
-                myEp["absolute_number"], logger.DEBUG)
+                str(myEp["absolute_number"]), logger.DEBUG)
            self.absolute_number = int(myEp["absolute_number"])

        self.name = getattr(myEp, 'episodename', "")
@ -1603,8 +1602,9 @@ class TVEpisode(object):
                u"The show dir is missing, not bothering to change the episode statuses since it'd probably be invalid")
            return

-        logger.log(str(self.show.indexerid) + u": Setting status for " + str(season) + "x" + str(
-            episode) + " based on status " + str(self.status) + " and existence of " + self.location, logger.DEBUG)
+        if self.location:
+            logger.log(str(self.show.indexerid) + u": Setting status for " + str(season) + "x" + str(
+                episode) + " based on status " + str(self.status) + " and existence of " + self.location, logger.DEBUG)

        if not ek.ek(os.path.isfile, self.location):