2014-03-10 01:18:05 -04:00
# Author: Nic Wolfe <nic@wolfeden.ca>
# URL: http://code.google.com/p/sickbeard/
#
# This file is part of Sick Beard.
#
# Sick Beard is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Sick Beard is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Sick Beard. If not, see <http://www.gnu.org/licenses/>.
import fnmatch
import os
import re
import datetime
import sickbeard
from sickbeard . common import countryList
from sickbeard . helpers import sanitizeSceneName
from sickbeard . scene_exceptions import get_scene_exceptions
from sickbeard import logger
from sickbeard import db
from sickbeard import encodingKludge as ek
from name_parser . parser import NameParser , InvalidNameException
from lib . unidecode import unidecode
resultFilters = [ " sub(pack|s|bed) " , " swesub(bed)? " ,
" (dir|sample|sub|nfo)fix " , " sample " , " (dvd)?extras " ,
" dub(bed)? " ]
def filterBadReleases ( name ) :
"""
Filters out non - english and just all - around stupid releases by comparing them
to the resultFilters contents .
name : the release name to check
Returns : True if the release name is OK , False if it ' s bad.
"""
try :
fp = NameParser ( )
parse_result = fp . parse ( name )
except InvalidNameException :
logger . log ( u " Unable to parse the filename " + name + " into a valid episode " , logger . WARNING )
return False
# # use the extra info and the scene group to filter against
# check_string = ''
# if parse_result.extra_info:
# check_string = parse_result.extra_info
# if parse_result.release_group:
# if check_string:
# check_string = check_string + '-' + parse_result.release_group
# else:
# check_string = parse_result.release_group
#
# # if there's no info after the season info then assume it's fine
# if not check_string:
# return True
# if any of the bad strings are in the name then say no
for x in resultFilters + sickbeard . IGNORE_WORDS . split ( ' , ' ) :
if re . search ( ' (^|[ \ W_]) ' + x . strip ( ) + ' ($|[ \ W_]) ' , name , re . I ) :
logger . log ( u " Invalid scene release: " + name + " contains " + x + " , ignoring it " , logger . DEBUG )
return False
return True
def sceneToNormalShowNames ( name ) :
"""
Takes a show name from a scene dirname and converts it to a more " human-readable " format .
name : The show name to convert
Returns : a list of all the possible " normal " names
"""
if not name :
return [ ]
name_list = [ name ]
# use both and and &
new_name = re . sub ( ' (?i)([ \ . ])and([ \ . ]) ' , ' \\ 1& \\ 2 ' , name , re . I )
if new_name not in name_list :
name_list . append ( new_name )
results = [ ]
for cur_name in name_list :
# add brackets around the year
results . append ( re . sub ( ' ( \ D)( \ d {4} )$ ' , ' \\ 1( \\ 2) ' , cur_name ) )
# add brackets around the country
country_match_str = ' | ' . join ( countryList . values ( ) )
results . append ( re . sub ( ' (?i)([. _-])( ' + country_match_str + ' )$ ' , ' \\ 1( \\ 2) ' , cur_name ) )
results + = name_list
return list ( set ( results ) )
def makeSceneShowSearchStrings ( show ) :
showNames = allPossibleShowNames ( show )
# scenify the names
return map ( sanitizeSceneName , showNames )
def makeSceneSeasonSearchString ( show , segment , extraSearchType = None ) :
myDB = db . DBConnection ( )
if show . air_by_date :
numseasons = 0
# the search string for air by date shows is just
seasonStrings = [ segment ]
else :
numseasonsSQlResult = myDB . select ( " SELECT COUNT(DISTINCT season) as numseasons FROM tv_episodes WHERE showid = ? and season != 0 " , [ show . indexerid ] )
numseasons = int ( numseasonsSQlResult [ 0 ] [ 0 ] )
seasonStrings = [ " S %02d " % segment ]
showNames = set ( makeSceneShowSearchStrings ( show ) )
toReturn = [ ]
# search each show name
for curShow in showNames :
# most providers all work the same way
if not extraSearchType :
# if there's only one season then we can just use the show name straight up
if numseasons == 1 :
toReturn . append ( curShow )
# for providers that don't allow multiple searches in one request we only search for Sxx style stuff
else :
for cur_season in seasonStrings :
toReturn . append ( curShow + " . " + cur_season )
return toReturn
def makeSceneSearchString ( episode ) :
myDB = db . DBConnection ( )
numseasonsSQlResult = myDB . select ( " SELECT COUNT(DISTINCT season) as numseasons FROM tv_episodes WHERE showid = ? and season != 0 " , [ episode . show . indexerid ] )
numseasons = int ( numseasonsSQlResult [ 0 ] [ 0 ] )
numepisodesSQlResult = myDB . select ( " SELECT COUNT(episode) as numepisodes FROM tv_episodes WHERE showid = ? and season != 0 " , [ episode . show . indexerid ] )
numepisodes = int ( numepisodesSQlResult [ 0 ] [ 0 ] )
# see if we should use dates instead of episodes
if episode . show . air_by_date and episode . airdate != datetime . date . fromordinal ( 1 ) :
epStrings = [ str ( episode . airdate ) ]
else :
epStrings = [ " S %02i E %02i " % ( int ( episode . season ) , int ( episode . episode ) ) ,
" %i x %02i " % ( int ( episode . season ) , int ( episode . episode ) ) ]
# for single-season shows just search for the show name -- if total ep count (exclude s0) is less than 11
# due to the amount of qualities and releases, it is easy to go over the 50 result limit on rss feeds otherwise
if numseasons == 1 and numepisodes < 11 :
epStrings = [ ' ' ]
showNames = set ( makeSceneShowSearchStrings ( episode . show ) )
toReturn = [ ]
for curShow in showNames :
for curEpString in epStrings :
toReturn . append ( curShow + ' . ' + curEpString )
return toReturn
def isGoodResult ( name , show , log = True ) :
"""
Use an automatically - created regex to make sure the result actually is the show it claims to be
"""
all_show_names = allPossibleShowNames ( show )
showNames = map ( sanitizeSceneName , all_show_names ) + all_show_names
showNames + = map ( unidecode , all_show_names )
for curName in set ( showNames ) :
escaped_name = re . sub ( ' \\ \\ [ \\ s.-] ' , ' \ W+ ' , re . escape ( curName ) )
if show . startyear :
escaped_name + = " (?: \ W+ " + str ( show . startyear ) + " )? "
2014-03-15 21:01:12 -04:00
curRegex = ' ^ ' + escaped_name + ' \ W+(?:(?:S \ d[ \ dE._ -])|(?: \ d \ d?x)|(?: \ d {4} \ W \ d \ d \ W \ d \ d)|(?:(?:part|pt)[ \ ._ -]?( \ d|[ivx]))|Season \ W+ \ d+ \ W+|E \ d+ \ W+)|((.+?)[. _-]+)( \ d { 1,2}[a-zA-Z] {2} )[. _-]+([a-zA-Z] { 3,4})[. _-]+( \ d {4} )*(.+?)([. _-])()((([^- ]+))?)?$ '
2014-03-10 01:18:05 -04:00
if log :
logger . log ( u " Checking if show " + name + " matches " + curRegex , logger . DEBUG )
match = re . search ( curRegex , name , re . I )
if match :
logger . log ( u " Matched " + curRegex + " to " + name , logger . DEBUG )
return True
if log :
logger . log ( u " Provider gave result " + name + " but that doesn ' t seem like a valid result for " + show . name + " so I ' m ignoring it " )
return False
def allPossibleShowNames ( show ) :
"""
Figures out every possible variation of the name for a particular show . Includes TVDB name , TVRage name ,
country codes on the end , eg . " Show Name (AU) " , and any scene exception names .
show : a TVShow object that we should get the names of
Returns : a list of all the possible show names
"""
showNames = [ show . name ]
showNames + = [ name for name in get_scene_exceptions ( show . indexerid ) ]
newShowNames = [ ]
country_list = countryList
country_list . update ( dict ( zip ( countryList . values ( ) , countryList . keys ( ) ) ) )
# if we have "Show Name Australia" or "Show Name (Australia)" this will add "Show Name (AU)" for
# any countries defined in common.countryList
# (and vice versa)
for curName in set ( showNames ) :
if not curName :
continue
for curCountry in country_list :
if curName . endswith ( ' ' + curCountry ) :
newShowNames . append ( curName . replace ( ' ' + curCountry , ' ( ' + country_list [ curCountry ] + ' ) ' ) )
elif curName . endswith ( ' ( ' + curCountry + ' ) ' ) :
newShowNames . append ( curName . replace ( ' ( ' + curCountry + ' ) ' , ' ( ' + country_list [ curCountry ] + ' ) ' ) )
showNames + = newShowNames
return showNames
def determineReleaseName ( dir_name = None , nzb_name = None ) :
""" Determine a release name from an nzb and/or folder name """
if nzb_name is not None :
logger . log ( u " Using nzb_name for release name. " )
return nzb_name . rpartition ( ' . ' ) [ 0 ]
if dir_name is None :
return None
# try to get the release name from nzb/nfo
file_types = [ " *.nzb " , " *.nfo " ]
for search in file_types :
reg_expr = re . compile ( fnmatch . translate ( search ) , re . IGNORECASE )
files = [ file_name for file_name in ek . ek ( os . listdir , dir_name ) if ek . ek ( os . path . isfile , ek . ek ( os . path . join , dir_name , file_name ) ) ]
results = filter ( reg_expr . search , files )
if len ( results ) == 1 :
found_file = ek . ek ( os . path . basename , results [ 0 ] )
found_file = found_file . rpartition ( ' . ' ) [ 0 ]
if filterBadReleases ( found_file ) :
logger . log ( u " Release name ( " + found_file + " ) found from file ( " + results [ 0 ] + " ) " )
return found_file . rpartition ( ' . ' ) [ 0 ]
# If that fails, we try the folder
folder = ek . ek ( os . path . basename , dir_name )
if filterBadReleases ( folder ) :
# NOTE: Multiple failed downloads will change the folder name.
# (e.g., appending #s)
# Should we handle that?
logger . log ( u " Folder name ( " + folder + " ) appears to be a valid release name. Using it. " )
return folder
return None