SickRage/lib/guessit/transfo/guess_movie_title_from_position.py

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

from __future__ import unicode_literals
from guessit import Guess
import unicodedata
import logging

log = logging.getLogger(__name__)


def process(mtree):
    def found_property(node, name, value, confidence):
        node.guess = Guess({ name: value },
                           confidence=confidence)
        log.debug('Found with confidence %.2f: %s' % (confidence, node.guess))

    def found_title(node, confidence):
        found_property(node, 'title', node.clean_value, confidence)

    basename = mtree.node_at((-2,))
    all_valid = lambda leaf: len(leaf.clean_value) > 0
    basename_leftover = basename.unidentified_leaves(valid=all_valid)

    try:
        folder = mtree.node_at((-3,))
        folder_leftover = folder.unidentified_leaves()
    except ValueError:
        folder = None
        folder_leftover = []

    log.debug('folder: %s' % folder_leftover)
    log.debug('basename: %s' % basename_leftover)

    # specific cases:
    # if we find the same group both in the folder name and the filename,
    # it's a good candidate for title
    if (folder_leftover and basename_leftover and
        folder_leftover[0].clean_value == basename_leftover[0].clean_value):

        found_title(folder_leftover[0], confidence=0.8)
        return

    # specific cases:
    # if the basename contains a number first followed by an unidentified
    # group, and the folder only contains 1 unidentified one, then we have
    # a series
    # ex: Millenium Trilogy (2009)/(1)The Girl With The Dragon Tattoo(2009).mkv
    try:
        series = folder_leftover[0]
        filmNumber = basename_leftover[0]
        title = basename_leftover[1]

        basename_leaves = basename.leaves()

        num = int(filmNumber.clean_value)

        log.debug('series: %s' % series.clean_value)
        log.debug('title: %s' % title.clean_value)
        if (series.clean_value != title.clean_value and
            series.clean_value != filmNumber.clean_value and
            basename_leaves.index(filmNumber) == 0 and
            basename_leaves.index(title) == 1):

            found_title(title, confidence=0.6)
            found_property(series, 'filmSeries',
                           series.clean_value, confidence=0.6)
            found_property(filmNumber, 'filmNumber',
                           num, confidence=0.6)
        return
    except Exception:
        pass

    # specific cases:
    #  - movies/tttttt (yyyy)/tttttt.ccc
    try:
        if mtree.node_at((-4, 0)).value.lower() == 'movies':
            folder = mtree.node_at((-3,))

            # Note:too generic, might solve all the unittests as they all
            # contain 'movies' in their path
            #
            #if containing_folder.is_leaf() and not containing_folder.guess:
            #    containing_folder.guess =
            #        Guess({ 'title': clean_string(containing_folder.value) },
            #              confidence=0.7)

            year_group = folder.first_leaf_containing('year')
            groups_before = folder.previous_unidentified_leaves(year_group)

            found_title(groups_before[0], confidence=0.8)
            return

    except Exception:
        pass

    # if we have either format or videoCodec in the folder containing the file
    # or one of its parents, then we should probably look for the title in
    # there rather than in the basename
    try:
        props = mtree.previous_leaves_containing(mtree.children[-2],
                                                 [ 'videoCodec', 'format',
                                                   'language' ])
    except IndexError:
        props = []

    if props:
        group_idx = props[0].node_idx[0]
        if all(g.node_idx[0] == group_idx for g in props):
            # if they're all in the same group, take leftover info from there
            leftover = mtree.node_at((group_idx,)).unidentified_leaves()

            if leftover:
                found_title(leftover[0], confidence=0.7)
                return

    # look for title in basename if there are some remaining undidentified
    # groups there
    if basename_leftover:
        title_candidate = basename_leftover[0]

        # if basename is only one word and the containing folder has at least
        # 3 words in it, we should take the title from the folder name
        # ex: Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi
        # ex: Movies/Somewhere.2010.DVDRip.XviD-iLG/i-smwhr.avi  <-- TODO: gets caught here?
        if (title_candidate.clean_value.count(' ') == 0 and
            folder_leftover and
            folder_leftover[0].clean_value.count(' ') >= 2):

            found_title(folder_leftover[0], confidence=0.7)
            return

        # if there are only 2 unidentified groups, the first of which is inside
        # brackets or parentheses, we take the second one for the title:
        # ex: Movies/[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi
        if len(basename_leftover) == 2 and basename_leftover[0].is_explicit():
            found_title(basename_leftover[1], confidence=0.8)
            return

        # if all else fails, take the first remaining unidentified group in the
        # basename as title
        found_title(title_candidate, confidence=0.6)
        return

    # if there are no leftover groups in the basename, look in the folder name
    if folder_leftover:
        found_title(folder_leftover[0], confidence=0.5)
        return

    # if nothing worked, look if we have a very small group at the beginning
    # of the basename
    basename = mtree.node_at((-2,))
    basename_leftover = basename.unidentified_leaves(valid=lambda leaf: True)
    if basename_leftover:
        found_title(basename_leftover[0], confidence=0.4)
        return