1
0
mirror of https://github.com/moparisthebest/SickRage synced 2024-11-11 03:45:01 -05:00
SickRage/lib/guessit/matchtree.py
echel0n 0d9fbc1ad7 Welcome to our SickBeard-TVRage Edition ...
This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer.

Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer.

Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk!

Enjoy!
2014-03-09 22:39:12 -07:00

288 lines
8.9 KiB
Python

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit import UnicodeMixin, base_text_type, Guess
from guessit.textutils import clean_string, str_fill
from guessit.patterns import group_delimiters
from guessit.guess import (merge_similar_guesses, merge_all,
choose_int, choose_string)
import copy
import logging
log = logging.getLogger(__name__)
class BaseMatchTree(UnicodeMixin):
"""A MatchTree represents the hierarchical split of a string into its
constituent semantic groups."""
def __init__(self, string='', span=None, parent=None):
self.string = string
self.span = span or (0, len(string))
self.parent = parent
self.children = []
self.guess = Guess()
@property
def value(self):
return self.string[self.span[0]:self.span[1]]
@property
def clean_value(self):
return clean_string(self.value)
@property
def offset(self):
return self.span[0]
@property
def info(self):
result = dict(self.guess)
for c in self.children:
result.update(c.info)
return result
@property
def root(self):
if not self.parent:
return self
return self.parent.root
@property
def depth(self):
if self.is_leaf():
return 0
return 1 + max(c.depth for c in self.children)
def is_leaf(self):
return self.children == []
def add_child(self, span):
child = MatchTree(self.string, span=span, parent=self)
self.children.append(child)
def partition(self, indices):
indices = sorted(indices)
if indices[0] != 0:
indices.insert(0, 0)
if indices[-1] != len(self.value):
indices.append(len(self.value))
for start, end in zip(indices[:-1], indices[1:]):
self.add_child(span=(self.offset + start,
self.offset + end))
def split_on_components(self, components):
offset = 0
for c in components:
start = self.value.find(c, offset)
end = start + len(c)
self.add_child(span=(self.offset + start,
self.offset + end))
offset = end
def nodes_at_depth(self, depth):
if depth == 0:
yield self
for child in self.children:
for node in child.nodes_at_depth(depth - 1):
yield node
@property
def node_idx(self):
if self.parent is None:
return ()
return self.parent.node_idx + (self.parent.children.index(self),)
def node_at(self, idx):
if not idx:
return self
try:
return self.children[idx[0]].node_at(idx[1:])
except:
raise ValueError('Non-existent node index: %s' % (idx,))
def nodes(self):
yield self
for child in self.children:
for node in child.nodes():
yield node
def _leaves(self):
if self.is_leaf():
yield self
else:
for child in self.children:
# pylint: disable=W0212
for leaf in child._leaves():
yield leaf
def leaves(self):
return list(self._leaves())
def to_string(self):
empty_line = ' ' * len(self.string)
def to_hex(x):
if isinstance(x, int):
return str(x) if x < 10 else chr(55 + x)
return x
def meaning(result):
mmap = { 'episodeNumber': 'E',
'season': 'S',
'extension': 'e',
'format': 'f',
'language': 'l',
'country': 'C',
'videoCodec': 'v',
'audioCodec': 'a',
'website': 'w',
'container': 'c',
'series': 'T',
'title': 't',
'date': 'd',
'year': 'y',
'releaseGroup': 'r',
'screenSize': 's'
}
if result is None:
return ' '
for prop, l in mmap.items():
if prop in result:
return l
return 'x'
lines = [ empty_line ] * (self.depth + 2) # +2: remaining, meaning
lines[-2] = self.string
for node in self.nodes():
if node == self:
continue
idx = node.node_idx
depth = len(idx) - 1
if idx:
lines[depth] = str_fill(lines[depth], node.span,
to_hex(idx[-1]))
if node.guess:
lines[-2] = str_fill(lines[-2], node.span, '_')
lines[-1] = str_fill(lines[-1], node.span, meaning(node.guess))
lines.append(self.string)
return '\n'.join(lines)
def __unicode__(self):
return self.to_string()
class MatchTree(BaseMatchTree):
"""The MatchTree contains a few "utility" methods which are not necessary
for the BaseMatchTree, but add a lot of convenience for writing
higher-level rules."""
def _unidentified_leaves(self,
valid=lambda leaf: len(leaf.clean_value) >= 2):
for leaf in self._leaves():
if not leaf.guess and valid(leaf):
yield leaf
def unidentified_leaves(self,
valid=lambda leaf: len(leaf.clean_value) >= 2):
return list(self._unidentified_leaves(valid))
def _leaves_containing(self, property_name):
if isinstance(property_name, base_text_type):
property_name = [ property_name ]
for leaf in self._leaves():
for prop in property_name:
if prop in leaf.guess:
yield leaf
break
def leaves_containing(self, property_name):
return list(self._leaves_containing(property_name))
def first_leaf_containing(self, property_name):
try:
return next(self._leaves_containing(property_name))
except StopIteration:
return None
def _previous_unidentified_leaves(self, node):
node_idx = node.node_idx
for leaf in self._unidentified_leaves():
if leaf.node_idx < node_idx:
yield leaf
def previous_unidentified_leaves(self, node):
return list(self._previous_unidentified_leaves(node))
def _previous_leaves_containing(self, node, property_name):
node_idx = node.node_idx
for leaf in self._leaves_containing(property_name):
if leaf.node_idx < node_idx:
yield leaf
def previous_leaves_containing(self, node, property_name):
return list(self._previous_leaves_containing(node, property_name))
def is_explicit(self):
"""Return whether the group was explicitly enclosed by
parentheses/square brackets/etc."""
return (self.value[0] + self.value[-1]) in group_delimiters
def matched(self):
# we need to make a copy here, as the merge functions work in place and
# calling them on the match tree would modify it
parts = [node.guess for node in self.nodes() if node.guess]
parts = copy.deepcopy(parts)
# 1- try to merge similar information together and give it a higher
# confidence
for int_part in ('year', 'season', 'episodeNumber'):
merge_similar_guesses(parts, int_part, choose_int)
for string_part in ('title', 'series', 'container', 'format',
'releaseGroup', 'website', 'audioCodec',
'videoCodec', 'screenSize', 'episodeFormat',
'audioChannels', 'idNumber'):
merge_similar_guesses(parts, string_part, choose_string)
# 2- merge the rest, potentially discarding information not properly
# merged before
result = merge_all(parts,
append=['language', 'subtitleLanguage', 'other'])
log.debug('Final result: ' + result.nice_string())
return result