From 284d53bee5f23bbd5b12530f8230b7f556f400b7 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Mon, 6 Jan 2014 23:22:31 +0000 Subject: [PATCH] + make movie matching properly aware of alias names --- BuildData.groovy | 32 +++++++++++++------ .../filebot/cli/ScriptShell.lib.groovy | 2 +- .../filebot/media/MediaDetection.java | 4 ++- website/data/query-blacklist.txt | 3 ++ 4 files changed, 30 insertions(+), 11 deletions(-) diff --git a/BuildData.groovy b/BuildData.groovy index 2d56a637..e70e68f2 100644 --- a/BuildData.groovy +++ b/BuildData.groovy @@ -62,6 +62,20 @@ def isValidMovieName(s) { return s=~ /^[A-Z0-9]/ && s =~ /[\p{Alpha}]{3}/ } +def getNamePermutations(names) { + def fn1 = { s -> s.replaceAll(/^(?i)(The|A)\s/, '') } + def fn2 = { s -> s.replaceAll(/\s&\s/, ' and ') } + def fn3 = { s -> s.replaceAll(/\([^\)]*\)$/, '') } + + def out = new LinkedHashSet() + out += names*.trim() + [fn1, fn2, fn3].each{ fn -> + def results = out.findResults{ fn(it) } + out += results*.trim(); + } + return out.unique{ it.toLowerCase().normalizePunctuation() }.findAll{ it.length() > 0 } +} + def treeSort(list, keyFunction) { def sorter = new TreeMap(String.CASE_INSENSITIVE_ORDER) list.each{ @@ -115,7 +129,7 @@ tmdb*.join('\t').join('\n').saveAs(tmdb_txt) movies = tmdb.findResults{ def ity = it[1..3] // imdb id, tmdb id, year - def names = it[4..-2].findAll{ isValidMovieName(it) }.unique{ it.toLowerCase().normalizePunctuation() } + def names = getNamePermutations(it[4..-1]).findAll{ isValidMovieName(it) } if (ity[0].toInteger() > 0 && ity[1].toInteger() > 0 && names.size() > 0) return ity + names else @@ -181,24 +195,24 @@ tvdb.values().findResults{ it.join('\t') }.join('\n').saveAs(tvdb_txt) def thetvdb_index = [] -tvdb.values().each{ - def n1 = it[2].trim() - def n2 = it[3].replaceAll(/^(?i)(The|A)\s/, '').replaceAll(/\s&\s/, ' and ').replaceAll(/\([^\)]*\)$/, '').trim() +tvdb.values().each{ r -> + def tvdb_name = r[2] + def imdb_name = r[3].replaceAll(/\([^\)]*\)$/, '').trim() - thetvdb_index << [it[0], n1] - if (similarity(n1,n2) < 1) { - thetvdb_index << [it[0], n2] + getNamePermutations([tvdb_name, imdb_name]).each{ n -> + thetvdb_index << [r[0], n] } } def addSeriesAlias = { from, to -> def se = thetvdb_index.find{ from == it[1] && !it.contains(to) } if (se == null) throw new Exception("Unabled to find series '${from}'") - thetvdb_index << [se[0], to] + getNamePermutations([to]).each{ n -> + thetvdb_index << [se[0], n] + } } // additional custom mappings -addSeriesAlias('Law & Order: Special Victims Unit', 'Law and Order SVU') addSeriesAlias('Law & Order: Special Victims Unit', 'Law & Order SVU') addSeriesAlias('CSI: Crime Scene Investigation', 'CSI') addSeriesAlias('M*A*S*H', 'MASH') diff --git a/source/net/sourceforge/filebot/cli/ScriptShell.lib.groovy b/source/net/sourceforge/filebot/cli/ScriptShell.lib.groovy index 4c5985ea..b04d8a93 100644 --- a/source/net/sourceforge/filebot/cli/ScriptShell.lib.groovy +++ b/source/net/sourceforge/filebot/cli/ScriptShell.lib.groovy @@ -235,7 +235,7 @@ def detectSeriesName(files, locale = Locale.ENGLISH) { } def detectMovie(File file, strict = true, queryLookupService = TheMovieDB, hashLookupService = OpenSubtitles, locale = Locale.ENGLISH) { - def movies = MediaDetection.matchMovieName([file.name, file.parentFile.name], true, 0) ?: MediaDetection.detectMovie(file, hashLookupService, queryLookupService, locale, strict) + def movies = MediaDetection.matchMovieName(file.listPath(3, true)*.name, true, 0) ?: MediaDetection.detectMovie(file, hashLookupService, queryLookupService, locale, strict) return movies == null || movies.isEmpty() ? null : movies.toList()[0] } diff --git a/source/net/sourceforge/filebot/media/MediaDetection.java b/source/net/sourceforge/filebot/media/MediaDetection.java index 1de00b8f..74f0a576 100644 --- a/source/net/sourceforge/filebot/media/MediaDetection.java +++ b/source/net/sourceforge/filebot/media/MediaDetection.java @@ -752,7 +752,9 @@ public class MediaDetection { if (movieIndex.isEmpty()) { try { for (Movie movie : releaseInfo.getMovieList()) { - movieIndex.add(new SimpleEntry(normalizePunctuation(movie.getName()).toLowerCase(), movie)); + for (String name : movie.getEffectiveNames()) { + movieIndex.add(new SimpleEntry(normalizePunctuation(name).toLowerCase(), movie)); + } } } catch (Exception e) { // can't load movie index, just try again next time diff --git a/website/data/query-blacklist.txt b/website/data/query-blacklist.txt index 56f43619..bb716fa5 100644 --- a/website/data/query-blacklist.txt +++ b/website/data/query-blacklist.txt @@ -137,6 +137,7 @@ AMC-BL AMC-TEST Anaglyph Arte +Best.of.\d{4} Best.Quality bt.downloads btarena.org @@ -209,6 +210,7 @@ info iNT iNTERNAL iPod +IPT ISO iTA iTALIA @@ -225,6 +227,7 @@ lol Los.Sustitutos M.HD mkvonly +Movie.Pack mpg$ MultiSub MVGroup.org