SickRage/lib/guessit/transfo/guess_episode_info_from_position.py

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

from __future__ import unicode_literals
from guessit.transfo import found_property
from guessit.patterns import non_episode_title, unlikely_series
import logging

log = logging.getLogger(__name__)


def match_from_epnum_position(mtree, node):
    epnum_idx = node.node_idx

    # a few helper functions to be able to filter using high-level semantics
    def before_epnum_in_same_pathgroup():
        return [ leaf for leaf in mtree.unidentified_leaves()
                 if (leaf.node_idx[0] == epnum_idx[0] and
                     leaf.node_idx[1:] < epnum_idx[1:]) ]

    def after_epnum_in_same_pathgroup():
        return [ leaf for leaf in mtree.unidentified_leaves()
                 if (leaf.node_idx[0] == epnum_idx[0] and
                     leaf.node_idx[1:] > epnum_idx[1:]) ]

    def after_epnum_in_same_explicitgroup():
        return [ leaf for leaf in mtree.unidentified_leaves()
                 if (leaf.node_idx[:2] == epnum_idx[:2] and
                     leaf.node_idx[2:] > epnum_idx[2:]) ]

    # epnumber is the first group and there are only 2 after it in same
    # path group
    # -> series title - episode title
    title_candidates = [ n for n in after_epnum_in_same_pathgroup()
                         if n.clean_value.lower() not in non_episode_title ]
    if ('title' not in mtree.info and                # no title
        before_epnum_in_same_pathgroup() == [] and   # no groups before
        len(title_candidates) == 2):                 # only 2 groups after

        found_property(title_candidates[0], 'series', confidence=0.4)
        found_property(title_candidates[1], 'title', confidence=0.4)
        return

    # if we have at least 1 valid group before the episodeNumber, then it's
    # probably the series name
    series_candidates = before_epnum_in_same_pathgroup()
    if len(series_candidates) >= 1:
        found_property(series_candidates[0], 'series', confidence=0.7)

    # only 1 group after (in the same path group) and it's probably the
    # episode title
    title_candidates = [ n for n in after_epnum_in_same_pathgroup()
                         if n.clean_value.lower() not in non_episode_title ]

    if len(title_candidates) == 1:
        found_property(title_candidates[0], 'title', confidence=0.5)
        return
    else:
        # try in the same explicit group, with lower confidence
        title_candidates = [ n for n in after_epnum_in_same_explicitgroup()
                             if n.clean_value.lower() not in non_episode_title
                             ]
        if len(title_candidates) == 1:
            found_property(title_candidates[0], 'title', confidence=0.4)
            return
        elif len(title_candidates) > 1:
            found_property(title_candidates[0], 'title', confidence=0.3)
            return

    # get the one with the longest value
    title_candidates = [ n for n in after_epnum_in_same_pathgroup()
                         if n.clean_value.lower() not in non_episode_title ]
    if title_candidates:
        maxidx = -1
        maxv = -1
        for i, c in enumerate(title_candidates):
            if len(c.clean_value) > maxv:
                maxidx = i
                maxv = len(c.clean_value)
        found_property(title_candidates[maxidx], 'title', confidence=0.3)


def process(mtree):
    eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]
    if eps:
        match_from_epnum_position(mtree, eps[0])

    else:
        # if we don't have the episode number, but at least 2 groups in the
        # basename, then it's probably series - eptitle
        basename = mtree.node_at((-2,))
        title_candidates = [ n for n in basename.unidentified_leaves()
                             if n.clean_value.lower() not in non_episode_title
                             ]

        if len(title_candidates) >= 2:
            found_property(title_candidates[0], 'series', 0.4)
            found_property(title_candidates[1], 'title', 0.4)
        elif len(title_candidates) == 1:
            # but if there's only one candidate, it's probably the series name
            found_property(title_candidates[0], 'series', 0.4)

    # if we only have 1 remaining valid group in the folder containing the
    # file, then it's likely that it is the series name
    try:
        series_candidates = mtree.node_at((-3,)).unidentified_leaves()
    except ValueError:
        series_candidates = []

    if len(series_candidates) == 1:
        found_property(series_candidates[0], 'series', 0.3)

    # if there's a path group that only contains the season info, then the
    # previous one is most likely the series title (ie: ../series/season X/..)
    eps = [ node for node in mtree.nodes()
            if 'season' in node.guess and 'episodeNumber' not in node.guess ]

    if eps:
        previous = [ node for node in mtree.unidentified_leaves()
                     if node.node_idx[0] == eps[0].node_idx[0] - 1 ]
        if len(previous) == 1:
            found_property(previous[0], 'series', 0.5)

    # reduce the confidence of unlikely series
    for node in mtree.nodes():
        if 'series' in node.guess:
            if node.guess['series'].lower() in unlikely_series:
                new_confidence = node.guess.confidence('series') * 0.5
                node.guess.set_confidence('series', new_confidence)
Welcome to our SickBeard-TVRage Edition ... This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer. Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer. Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk! Enjoy! 2014-03-10 01:18:05 -04:00			`#!/usr/bin/env python2`
			`# -- coding: utf-8 --`
			`#`
			`# GuessIt - A library for guessing information from filenames`
			`# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>`
			`#`
			`# GuessIt is free software; you can redistribute it and/or modify it under`
			`# the terms of the Lesser GNU General Public License as published by`
			`# the Free Software Foundation; either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# GuessIt is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# Lesser GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the Lesser GNU General Public License`
			`# along with this program. If not, see <http://www.gnu.org/licenses/>.`
			`#`

			`from __future__ import unicode_literals`
			`from guessit.transfo import found_property`
			`from guessit.patterns import non_episode_title, unlikely_series`
			`import logging`

			`log = logging.getLogger(__name__)`


			`def match_from_epnum_position(mtree, node):`
			`epnum_idx = node.node_idx`

			`# a few helper functions to be able to filter using high-level semantics`
			`def before_epnum_in_same_pathgroup():`
			`return [ leaf for leaf in mtree.unidentified_leaves()`
			`if (leaf.node_idx[0] == epnum_idx[0] and`
			`leaf.node_idx[1:] < epnum_idx[1:]) ]`

			`def after_epnum_in_same_pathgroup():`
			`return [ leaf for leaf in mtree.unidentified_leaves()`
			`if (leaf.node_idx[0] == epnum_idx[0] and`
			`leaf.node_idx[1:] > epnum_idx[1:]) ]`

			`def after_epnum_in_same_explicitgroup():`
			`return [ leaf for leaf in mtree.unidentified_leaves()`
			`if (leaf.node_idx[:2] == epnum_idx[:2] and`
			`leaf.node_idx[2:] > epnum_idx[2:]) ]`

			`# epnumber is the first group and there are only 2 after it in same`
			`# path group`
			`# -> series title - episode title`
			`title_candidates = [ n for n in after_epnum_in_same_pathgroup()`
			`if n.clean_value.lower() not in non_episode_title ]`
			`if ('title' not in mtree.info and # no title`
			`before_epnum_in_same_pathgroup() == [] and # no groups before`
			`len(title_candidates) == 2): # only 2 groups after`

			`found_property(title_candidates[0], 'series', confidence=0.4)`
			`found_property(title_candidates[1], 'title', confidence=0.4)`
			`return`

			`# if we have at least 1 valid group before the episodeNumber, then it's`
			`# probably the series name`
			`series_candidates = before_epnum_in_same_pathgroup()`
			`if len(series_candidates) >= 1:`
			`found_property(series_candidates[0], 'series', confidence=0.7)`

			`# only 1 group after (in the same path group) and it's probably the`
			`# episode title`
			`title_candidates = [ n for n in after_epnum_in_same_pathgroup()`
			`if n.clean_value.lower() not in non_episode_title ]`

			`if len(title_candidates) == 1:`
			`found_property(title_candidates[0], 'title', confidence=0.5)`
			`return`
			`else:`
			`# try in the same explicit group, with lower confidence`
			`title_candidates = [ n for n in after_epnum_in_same_explicitgroup()`
			`if n.clean_value.lower() not in non_episode_title`
			`]`
			`if len(title_candidates) == 1:`
			`found_property(title_candidates[0], 'title', confidence=0.4)`
			`return`
			`elif len(title_candidates) > 1:`
			`found_property(title_candidates[0], 'title', confidence=0.3)`
			`return`

			`# get the one with the longest value`
			`title_candidates = [ n for n in after_epnum_in_same_pathgroup()`
			`if n.clean_value.lower() not in non_episode_title ]`
			`if title_candidates:`
			`maxidx = -1`
			`maxv = -1`
			`for i, c in enumerate(title_candidates):`
			`if len(c.clean_value) > maxv:`
			`maxidx = i`
			`maxv = len(c.clean_value)`
			`found_property(title_candidates[maxidx], 'title', confidence=0.3)`


			`def process(mtree):`
			`eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]`
			`if eps:`
			`match_from_epnum_position(mtree, eps[0])`

			`else:`
			`# if we don't have the episode number, but at least 2 groups in the`
			`# basename, then it's probably series - eptitle`
			`basename = mtree.node_at((-2,))`
			`title_candidates = [ n for n in basename.unidentified_leaves()`
			`if n.clean_value.lower() not in non_episode_title`
			`]`

			`if len(title_candidates) >= 2:`
			`found_property(title_candidates[0], 'series', 0.4)`
			`found_property(title_candidates[1], 'title', 0.4)`
			`elif len(title_candidates) == 1:`
			`# but if there's only one candidate, it's probably the series name`
			`found_property(title_candidates[0], 'series', 0.4)`

			`# if we only have 1 remaining valid group in the folder containing the`
			`# file, then it's likely that it is the series name`
			`try:`
			`series_candidates = mtree.node_at((-3,)).unidentified_leaves()`
			`except ValueError:`
			`series_candidates = []`

			`if len(series_candidates) == 1:`
			`found_property(series_candidates[0], 'series', 0.3)`

			`# if there's a path group that only contains the season info, then the`
			`# previous one is most likely the series title (ie: ../series/season X/..)`
			`eps = [ node for node in mtree.nodes()`
			`if 'season' in node.guess and 'episodeNumber' not in node.guess ]`

			`if eps:`
			`previous = [ node for node in mtree.unidentified_leaves()`
			`if node.node_idx[0] == eps[0].node_idx[0] - 1 ]`
			`if len(previous) == 1:`
			`found_property(previous[0], 'series', 0.5)`

			`# reduce the confidence of unlikely series`
			`for node in mtree.nodes():`
			`if 'series' in node.guess:`
			`if node.guess['series'].lower() in unlikely_series:`
			`new_confidence = node.guess.confidence('series') * 0.5`
			`node.guess.set_confidence('series', new_confidence)`