1
0
mirror of https://github.com/moparisthebest/SickRage synced 2025-01-05 10:58:01 -05:00
SickRage/lib/guessit/matchtree.py

288 lines
8.9 KiB
Python
Raw Normal View History

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit import UnicodeMixin, base_text_type, Guess
from guessit.textutils import clean_string, str_fill
from guessit.patterns import group_delimiters
from guessit.guess import (merge_similar_guesses, merge_all,
choose_int, choose_string)
import copy
import logging
log = logging.getLogger(__name__)
class BaseMatchTree(UnicodeMixin):
"""A MatchTree represents the hierarchical split of a string into its
constituent semantic groups."""
def __init__(self, string='', span=None, parent=None):
self.string = string
self.span = span or (0, len(string))
self.parent = parent
self.children = []
self.guess = Guess()
@property
def value(self):
return self.string[self.span[0]:self.span[1]]
@property
def clean_value(self):
return clean_string(self.value)
@property
def offset(self):
return self.span[0]
@property
def info(self):
result = dict(self.guess)
for c in self.children:
result.update(c.info)
return result
@property
def root(self):
if not self.parent:
return self
return self.parent.root
@property
def depth(self):
if self.is_leaf():
return 0
return 1 + max(c.depth for c in self.children)
def is_leaf(self):
return self.children == []
def add_child(self, span):
child = MatchTree(self.string, span=span, parent=self)
self.children.append(child)
def partition(self, indices):
indices = sorted(indices)
if indices[0] != 0:
indices.insert(0, 0)
if indices[-1] != len(self.value):
indices.append(len(self.value))
for start, end in zip(indices[:-1], indices[1:]):
self.add_child(span=(self.offset + start,
self.offset + end))
def split_on_components(self, components):
offset = 0
for c in components:
start = self.value.find(c, offset)
end = start + len(c)
self.add_child(span=(self.offset + start,
self.offset + end))
offset = end
def nodes_at_depth(self, depth):
if depth == 0:
yield self
for child in self.children:
for node in child.nodes_at_depth(depth - 1):
yield node
@property
def node_idx(self):
if self.parent is None:
return ()
return self.parent.node_idx + (self.parent.children.index(self),)
def node_at(self, idx):
if not idx:
return self
try:
return self.children[idx[0]].node_at(idx[1:])
except:
raise ValueError('Non-existent node index: %s' % (idx,))
def nodes(self):
yield self
for child in self.children:
for node in child.nodes():
yield node
def _leaves(self):
if self.is_leaf():
yield self
else:
for child in self.children:
# pylint: disable=W0212
for leaf in child._leaves():
yield leaf
def leaves(self):
return list(self._leaves())
def to_string(self):
empty_line = ' ' * len(self.string)
def to_hex(x):
if isinstance(x, int):
return str(x) if x < 10 else chr(55 + x)
return x
def meaning(result):
mmap = { 'episodeNumber': 'E',
'season': 'S',
'extension': 'e',
'format': 'f',
'language': 'l',
'country': 'C',
'videoCodec': 'v',
'audioCodec': 'a',
'website': 'w',
'container': 'c',
'series': 'T',
'title': 't',
'date': 'd',
'year': 'y',
'releaseGroup': 'r',
'screenSize': 's'
}
if result is None:
return ' '
for prop, l in mmap.items():
if prop in result:
return l
return 'x'
lines = [ empty_line ] * (self.depth + 2) # +2: remaining, meaning
lines[-2] = self.string
for node in self.nodes():
if node == self:
continue
idx = node.node_idx
depth = len(idx) - 1
if idx:
lines[depth] = str_fill(lines[depth], node.span,
to_hex(idx[-1]))
if node.guess:
lines[-2] = str_fill(lines[-2], node.span, '_')
lines[-1] = str_fill(lines[-1], node.span, meaning(node.guess))
lines.append(self.string)
return '\n'.join(lines)
def __unicode__(self):
return self.to_string()
class MatchTree(BaseMatchTree):
"""The MatchTree contains a few "utility" methods which are not necessary
for the BaseMatchTree, but add a lot of convenience for writing
higher-level rules."""
def _unidentified_leaves(self,
valid=lambda leaf: len(leaf.clean_value) >= 2):
for leaf in self._leaves():
if not leaf.guess and valid(leaf):
yield leaf
def unidentified_leaves(self,
valid=lambda leaf: len(leaf.clean_value) >= 2):
return list(self._unidentified_leaves(valid))
def _leaves_containing(self, property_name):
if isinstance(property_name, base_text_type):
property_name = [ property_name ]
for leaf in self._leaves():
for prop in property_name:
if prop in leaf.guess:
yield leaf
break
def leaves_containing(self, property_name):
return list(self._leaves_containing(property_name))
def first_leaf_containing(self, property_name):
try:
return next(self._leaves_containing(property_name))
except StopIteration:
return None
def _previous_unidentified_leaves(self, node):
node_idx = node.node_idx
for leaf in self._unidentified_leaves():
if leaf.node_idx < node_idx:
yield leaf
def previous_unidentified_leaves(self, node):
return list(self._previous_unidentified_leaves(node))
def _previous_leaves_containing(self, node, property_name):
node_idx = node.node_idx
for leaf in self._leaves_containing(property_name):
if leaf.node_idx < node_idx:
yield leaf
def previous_leaves_containing(self, node, property_name):
return list(self._previous_leaves_containing(node, property_name))
def is_explicit(self):
"""Return whether the group was explicitly enclosed by
parentheses/square brackets/etc."""
return (self.value[0] + self.value[-1]) in group_delimiters
def matched(self):
# we need to make a copy here, as the merge functions work in place and
# calling them on the match tree would modify it
parts = [node.guess for node in self.nodes() if node.guess]
parts = copy.deepcopy(parts)
# 1- try to merge similar information together and give it a higher
# confidence
for int_part in ('year', 'season', 'episodeNumber'):
merge_similar_guesses(parts, int_part, choose_int)
for string_part in ('title', 'series', 'container', 'format',
'releaseGroup', 'website', 'audioCodec',
'videoCodec', 'screenSize', 'episodeFormat',
'audioChannels', 'idNumber'):
merge_similar_guesses(parts, string_part, choose_string)
# 2- merge the rest, potentially discarding information not properly
# merged before
result = merge_all(parts,
append=['language', 'subtitleLanguage', 'other'])
log.debug('Final result: ' + result.nice_string())
return result