SickRage/lib/subliminal/services/tvsubtitles.py

# -*- coding: utf-8 -*-
# Copyright 2012 Nicolas Wack <wackou@gmail.com>
#
# This file is part of subliminal.
#
# subliminal is free software; you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# subliminal is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with subliminal.  If not, see <http://www.gnu.org/licenses/>.
from . import ServiceBase
from ..cache import cachedmethod
from ..language import language_set, Language
from ..subtitles import get_subtitle_path, ResultSubtitle
from ..utils import get_keywords
from ..videos import Episode
from bs4 import BeautifulSoup
import logging
import re


logger = logging.getLogger("subliminal")


def match(pattern, string):
    try:
        return re.search(pattern, string).group(1)
    except AttributeError:
        logger.debug(u'Could not match %r on %r' % (pattern, string))
        return None


class TvSubtitles(ServiceBase):
    server_url = 'http://www.tvsubtitles.net'
    site_url = 'http://www.tvsubtitles.net'
    api_based = False
    languages = language_set(['ar', 'bg', 'cs', 'da', 'de', 'el', 'en', 'es', 'fi', 'fr', 'hu',
                              'it', 'ja', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'tr', 'uk',
                              'zh', 'pb'])
    #TODO: Find more exceptions
    language_map = {'gr': Language('gre'), 'cz': Language('cze'), 'ua': Language('ukr'),
                    'cn': Language('chi'), 'br': Language('pob')}
    videos = [Episode]
    require_video = False
    required_features = ['permissive']

    @cachedmethod
    def get_likely_series_id(self, name):
        r = self.session.post('%s/search.php' % self.server_url, data={'q': name})
        soup = BeautifulSoup(r.content, self.required_features)
        maindiv = soup.find('div', 'left')
        results = []
        for elem in maindiv.find_all('li'):
            sid = int(match('tvshow-([0-9]+)\.html', elem.a['href']))
            show_name = match('(.*) \(', elem.a.text)
            results.append((show_name, sid))
        #TODO: pick up the best one in a smart way
        result = results[0]
        return result[1]

    @cachedmethod
    def get_episode_id(self, series_id, season, number):
        """Get the TvSubtitles id for the given episode. Raises KeyError if none
        could be found."""
        # download the page of the season, contains ids for all episodes
        episode_id = None
        r = self.session.get('%s/tvshow-%d-%d.html' % (self.server_url, series_id, season))
        soup = BeautifulSoup(r.content, self.required_features)
        table = soup.find('table', id='table5')
        for row in table.find_all('tr'):
            cells = row.find_all('td')
            if not cells:
                continue
            episode_number = match('x([0-9]+)', cells[0].text)
            if not episode_number:
                continue
            episode_number = int(episode_number)
            episode_id = int(match('episode-([0-9]+)', cells[1].a['href']))
            # we could just return the id of the queried episode, but as we
            # already downloaded the whole page we might as well fill in the
            # information for all the episodes of the season
            self.cache_for(self.get_episode_id, args=(series_id, season, episode_number), result=episode_id)
        # raises KeyError if not found
        return self.cached_value(self.get_episode_id, args=(series_id, season, number))

    # Do not cache this method in order to always check for the most recent
    # subtitles
    def get_sub_ids(self, episode_id):
        subids = []
        r = self.session.get('%s/episode-%d.html' % (self.server_url, episode_id))
        epsoup = BeautifulSoup(r.content, self.required_features)
        for subdiv in epsoup.find_all('a'):
            if 'href' not in subdiv.attrs or not subdiv['href'].startswith('/subtitle'):
                continue
            subid = int(match('([0-9]+)', subdiv['href']))
            lang = self.get_language(match('flags/(.*).gif', subdiv.img['src']))
            result = {'subid': subid, 'language': lang}
            for p in subdiv.find_all('p'):
                if 'alt' in p.attrs and p['alt'] == 'rip':
                    result['rip'] = p.text.strip()
                if 'alt' in p.attrs and p['alt'] == 'release':
                    result['release'] = p.text.strip()
            subids.append(result)
        return subids

    def list_checked(self, video, languages):
        return self.query(video.path or video.release, languages, get_keywords(video.guess), video.series, video.season, video.episode)

    def query(self, filepath, languages, keywords, series, season, episode):
        logger.debug(u'Getting subtitles for %s season %d episode %d with languages %r' % (series, season, episode, languages))
        self.init_cache()
        sid = self.get_likely_series_id(series.lower())
        try:
            ep_id = self.get_episode_id(sid, season, episode)
        except KeyError:
            logger.debug(u'Could not find episode id for %s season %d episode %d' % (series, season, episode))
            return []
        subids = self.get_sub_ids(ep_id)
        # filter the subtitles with our queried languages
        subtitles = []
        for subid in subids:
            language = subid['language']
            if language not in languages:
                continue
            path = get_subtitle_path(filepath, language, self.config.multi)
            subtitle = ResultSubtitle(path, language, self.__class__.__name__.lower(), '%s/download-%d.html' % (self.server_url, subid['subid']),
                                      keywords=[subid['rip'], subid['release']])
            subtitles.append(subtitle)
        return subtitles

    def download(self, subtitle):
        self.download_zip_file(subtitle.link, subtitle.path)
        return subtitle


Service = TvSubtitles
Welcome to our SickBeard-TVRage Edition ... This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer. Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer. Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk! Enjoy! 2014-03-10 01:18:05 -04:00			`# -- coding: utf-8 --`
			`# Copyright 2012 Nicolas Wack <wackou@gmail.com>`
			`#`
			`# This file is part of subliminal.`
			`#`
			`# subliminal is free software; you can redistribute it and/or modify it under`
			`# the terms of the GNU Lesser General Public License as published by`
			`# the Free Software Foundation; either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# subliminal is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU Lesser General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU Lesser General Public License`
			`# along with subliminal. If not, see <http://www.gnu.org/licenses/>.`
			`from . import ServiceBase`
			`from ..cache import cachedmethod`
			`from ..language import language_set, Language`
			`from ..subtitles import get_subtitle_path, ResultSubtitle`
			`from ..utils import get_keywords`
			`from ..videos import Episode`
			`from bs4 import BeautifulSoup`
			`import logging`
			`import re`


Fixed subliminal issues. Added ignore/required words option to bet set individually for each show. Fixed issue with global ignore words not properly matching against releases. Fixed issue with 2014-04-24 01:18:16 -04:00			`logger = logging.getLogger("subliminal")`
Welcome to our SickBeard-TVRage Edition ... This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer. Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer. Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk! Enjoy! 2014-03-10 01:18:05 -04:00

			`def match(pattern, string):`
			`try:`
			`return re.search(pattern, string).group(1)`
			`except AttributeError:`
			`logger.debug(u'Could not match %r on %r' % (pattern, string))`
			`return None`


			`class TvSubtitles(ServiceBase):`
			`server_url = 'http://www.tvsubtitles.net'`
Fixed subliminal issues. Added ignore/required words option to bet set individually for each show. Fixed issue with global ignore words not properly matching against releases. Fixed issue with 2014-04-24 01:18:16 -04:00			`site_url = 'http://www.tvsubtitles.net'`
Welcome to our SickBeard-TVRage Edition ... This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer. Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer. Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk! Enjoy! 2014-03-10 01:18:05 -04:00			`api_based = False`
			`languages = language_set(['ar', 'bg', 'cs', 'da', 'de', 'el', 'en', 'es', 'fi', 'fr', 'hu',`
			`'it', 'ja', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'tr', 'uk',`
Fixed subliminal issues. Added ignore/required words option to bet set individually for each show. Fixed issue with global ignore words not properly matching against releases. Fixed issue with 2014-04-24 01:18:16 -04:00			`'zh', 'pb'])`
Welcome to our SickBeard-TVRage Edition ... This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer. Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer. Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk! Enjoy! 2014-03-10 01:18:05 -04:00			`#TODO: Find more exceptions`
			`language_map = {'gr': Language('gre'), 'cz': Language('cze'), 'ua': Language('ukr'),`
Fixed subliminal issues. Added ignore/required words option to bet set individually for each show. Fixed issue with global ignore words not properly matching against releases. Fixed issue with 2014-04-24 01:18:16 -04:00			`'cn': Language('chi'), 'br': Language('pob')}`
Welcome to our SickBeard-TVRage Edition ... This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer. Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer. Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk! Enjoy! 2014-03-10 01:18:05 -04:00			`videos = [Episode]`
			`require_video = False`
			`required_features = ['permissive']`

			`@cachedmethod`
			`def get_likely_series_id(self, name):`
			`r = self.session.post('%s/search.php' % self.server_url, data={'q': name})`
			`soup = BeautifulSoup(r.content, self.required_features)`
			`maindiv = soup.find('div', 'left')`
			`results = []`
			`for elem in maindiv.find_all('li'):`
			`sid = int(match('tvshow-([0-9]+)\.html', elem.a['href']))`
			`show_name = match('(.*) \(', elem.a.text)`
			`results.append((show_name, sid))`
			`#TODO: pick up the best one in a smart way`
			`result = results[0]`
			`return result[1]`

			`@cachedmethod`
			`def get_episode_id(self, series_id, season, number):`
			`"""Get the TvSubtitles id for the given episode. Raises KeyError if none`
			`could be found."""`
			`# download the page of the season, contains ids for all episodes`
			`episode_id = None`
			`r = self.session.get('%s/tvshow-%d-%d.html' % (self.server_url, series_id, season))`
			`soup = BeautifulSoup(r.content, self.required_features)`
			`table = soup.find('table', id='table5')`
			`for row in table.find_all('tr'):`
			`cells = row.find_all('td')`
			`if not cells:`
			`continue`
			`episode_number = match('x([0-9]+)', cells[0].text)`
			`if not episode_number:`
			`continue`
			`episode_number = int(episode_number)`
			`episode_id = int(match('episode-([0-9]+)', cells[1].a['href']))`
			`# we could just return the id of the queried episode, but as we`
			`# already downloaded the whole page we might as well fill in the`
			`# information for all the episodes of the season`
			`self.cache_for(self.get_episode_id, args=(series_id, season, episode_number), result=episode_id)`
			`# raises KeyError if not found`
			`return self.cached_value(self.get_episode_id, args=(series_id, season, number))`

			`# Do not cache this method in order to always check for the most recent`
			`# subtitles`
			`def get_sub_ids(self, episode_id):`
			`subids = []`
			`r = self.session.get('%s/episode-%d.html' % (self.server_url, episode_id))`
			`epsoup = BeautifulSoup(r.content, self.required_features)`
			`for subdiv in epsoup.find_all('a'):`
			`if 'href' not in subdiv.attrs or not subdiv['href'].startswith('/subtitle'):`
			`continue`
			`subid = int(match('([0-9]+)', subdiv['href']))`
			`lang = self.get_language(match('flags/(.*).gif', subdiv.img['src']))`
			`result = {'subid': subid, 'language': lang}`
			`for p in subdiv.find_all('p'):`
			`if 'alt' in p.attrs and p['alt'] == 'rip':`
			`result['rip'] = p.text.strip()`
			`if 'alt' in p.attrs and p['alt'] == 'release':`
			`result['release'] = p.text.strip()`
			`subids.append(result)`
			`return subids`

			`def list_checked(self, video, languages):`
			`return self.query(video.path or video.release, languages, get_keywords(video.guess), video.series, video.season, video.episode)`

			`def query(self, filepath, languages, keywords, series, season, episode):`
			`logger.debug(u'Getting subtitles for %s season %d episode %d with languages %r' % (series, season, episode, languages))`
			`self.init_cache()`
			`sid = self.get_likely_series_id(series.lower())`
			`try:`
			`ep_id = self.get_episode_id(sid, season, episode)`
			`except KeyError:`
			`logger.debug(u'Could not find episode id for %s season %d episode %d' % (series, season, episode))`
			`return []`
			`subids = self.get_sub_ids(ep_id)`
			`# filter the subtitles with our queried languages`
			`subtitles = []`
			`for subid in subids:`
			`language = subid['language']`
			`if language not in languages:`
			`continue`
			`path = get_subtitle_path(filepath, language, self.config.multi)`
			`subtitle = ResultSubtitle(path, language, self.__class__.__name__.lower(), '%s/download-%d.html' % (self.server_url, subid['subid']),`
			`keywords=[subid['rip'], subid['release']])`
			`subtitles.append(subtitle)`
			`return subtitles`

			`def download(self, subtitle):`
			`self.download_zip_file(subtitle.link, subtitle.path)`
			`return subtitle`


			`Service = TvSubtitles`