2015-05-11 06:35:45 -04:00
|
|
|
|
#!/usr/bin/env filebot -script
|
|
|
|
|
|
2016-02-24 14:33:04 -05:00
|
|
|
|
import java.util.regex.*
|
2014-01-08 03:36:32 -05:00
|
|
|
|
import org.tukaani.xz.*
|
2013-11-20 05:07:25 -05:00
|
|
|
|
|
2016-02-24 14:27:31 -05:00
|
|
|
|
|
2016-11-19 12:12:09 -05:00
|
|
|
|
// ------------------------------ UPDATE LISTS ------------------------------ //
|
|
|
|
|
|
2013-08-10 01:23:14 -04:00
|
|
|
|
|
2018-03-12 05:32:05 -04:00
|
|
|
|
def dir_root = project as File
|
|
|
|
|
def dir_data = data as File
|
|
|
|
|
def dir_release = release as File
|
2018-03-12 05:15:21 -04:00
|
|
|
|
|
2013-08-10 01:23:14 -04:00
|
|
|
|
|
2016-02-24 14:27:31 -05:00
|
|
|
|
// sort and check shared regex collections
|
2016-07-09 07:55:58 -04:00
|
|
|
|
def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw.githubusercontent.com/filebot/data/master')
|
|
|
|
|
|
2016-02-24 14:27:31 -05:00
|
|
|
|
['add-series-alias.txt',
|
|
|
|
|
'query-blacklist.txt',
|
|
|
|
|
'release-groups.txt',
|
|
|
|
|
'series-mappings.txt'
|
|
|
|
|
].each{
|
2016-07-09 07:55:58 -04:00
|
|
|
|
def input = new URL(dir_data_master + '/' + it)
|
2018-03-12 05:32:05 -04:00
|
|
|
|
def output = dir_release.resolve(it)
|
2016-02-24 14:27:31 -05:00
|
|
|
|
|
2017-02-01 15:44:57 -05:00
|
|
|
|
log.finest "Fetch $input"
|
2016-03-08 07:59:24 -05:00
|
|
|
|
def lines = new TreeSet(String.CASE_INSENSITIVE_ORDER)
|
2016-02-24 14:38:03 -05:00
|
|
|
|
input.getText('UTF-8').split(/\R/)*.trim().findAll{ it.length() > 0 }.each{
|
2016-03-26 06:25:45 -04:00
|
|
|
|
lines << Pattern.compile(it).pattern()
|
2013-08-10 01:23:14 -04:00
|
|
|
|
}
|
|
|
|
|
|
2016-03-26 06:25:45 -04:00
|
|
|
|
lines.each{ log.finest "$it" }
|
2016-03-08 07:59:24 -05:00
|
|
|
|
pack(output, lines)
|
2016-02-24 14:27:31 -05:00
|
|
|
|
}
|
2013-08-10 01:23:14 -04:00
|
|
|
|
|
|
|
|
|
|
2016-11-19 12:12:09 -05:00
|
|
|
|
// ------------------------------ FUNCTIONS ------------------------------ //
|
2013-08-10 01:23:14 -04:00
|
|
|
|
|
|
|
|
|
|
2018-03-12 05:56:19 -04:00
|
|
|
|
def moviedb_out = dir_release.resolve('moviedb.txt')
|
|
|
|
|
def thetvdb_out = dir_release.resolve('thetvdb.txt')
|
|
|
|
|
def anidb_out = dir_release.resolve('anidb.txt')
|
|
|
|
|
def osdb_out = dir_release.resolve('osdb.txt')
|
2015-05-11 05:13:35 -04:00
|
|
|
|
|
2013-08-10 01:23:14 -04:00
|
|
|
|
|
2013-08-10 03:56:11 -04:00
|
|
|
|
def pack(file, lines) {
|
2019-05-22 07:48:39 -04:00
|
|
|
|
def previousHash = hash(file)
|
|
|
|
|
|
2016-03-09 22:53:49 -05:00
|
|
|
|
file.withOutputStream{ out ->
|
|
|
|
|
out.withWriter('UTF-8'){ writer ->
|
|
|
|
|
lines.each{ writer.append(it).append('\n') }
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-05-22 07:48:39 -04:00
|
|
|
|
|
|
|
|
|
if (hash(file) != previousHash) {
|
|
|
|
|
file.parentFile.resolve(file.name + '.xz').withOutputStream{ out ->
|
|
|
|
|
new XZOutputStream(out, new LZMA2Options(LZMA2Options.PRESET_DEFAULT)).withWriter('UTF-8'){ writer ->
|
|
|
|
|
lines.each{ writer.append(it).append('\n') }
|
|
|
|
|
}
|
2013-08-10 01:23:14 -04:00
|
|
|
|
}
|
2019-05-22 07:48:39 -04:00
|
|
|
|
} else {
|
|
|
|
|
log.warning "[NOT MODIFIED] $file [$previousHash]"
|
2013-08-10 01:23:14 -04:00
|
|
|
|
}
|
2019-05-22 07:48:39 -04:00
|
|
|
|
|
2014-01-07 10:21:38 -05:00
|
|
|
|
def rows = lines.size()
|
|
|
|
|
def columns = lines.collect{ it.split(/\t/).length }.max()
|
2016-03-26 06:02:27 -04:00
|
|
|
|
log.info "${file.canonicalFile} ($rows rows, $columns columns)"
|
2013-08-10 01:23:14 -04:00
|
|
|
|
}
|
|
|
|
|
|
2019-05-22 07:48:39 -04:00
|
|
|
|
def hash(file) {
|
|
|
|
|
return file.length()
|
|
|
|
|
}
|
2013-08-10 01:23:14 -04:00
|
|
|
|
|
2013-11-21 09:31:31 -05:00
|
|
|
|
def isValidMovieName(s) {
|
2014-03-06 10:50:14 -05:00
|
|
|
|
return (s.normalizePunctuation().length() >= 4) || (s=~ /^[A-Z0-9]/ && s =~ /[\p{Alnum}]{3}/)
|
2013-11-21 09:31:31 -05:00
|
|
|
|
}
|
|
|
|
|
|
2014-01-06 18:22:31 -05:00
|
|
|
|
def getNamePermutations(names) {
|
2014-09-15 19:49:19 -04:00
|
|
|
|
def normalize = { s -> s.toLowerCase().normalizePunctuation() }.memoize()
|
2014-01-06 18:22:31 -05:00
|
|
|
|
|
2014-09-11 16:04:24 -04:00
|
|
|
|
def out = names*.trim().unique().collectMany{ original ->
|
2016-03-12 12:02:13 -05:00
|
|
|
|
def s = original.trim()
|
2016-03-12 13:15:07 -05:00
|
|
|
|
s = s.replaceAll(/([,]\s(The|A)$)/, '')
|
2016-03-12 12:02:13 -05:00
|
|
|
|
s = s.replaceAll(/\s&\s/, ' and ')
|
|
|
|
|
s = s.replaceAll(/\s\([^\)]*\)$/, '').trim()
|
2016-03-12 13:15:07 -05:00
|
|
|
|
s = s.replaceAll(/^(?i:The|A)\s/, '').trim()
|
|
|
|
|
return [original, s]
|
2014-09-15 15:04:25 -04:00
|
|
|
|
}.unique{ normalize(it) }.findAll{ it.length() > 0 }
|
|
|
|
|
|
2017-02-01 15:28:31 -05:00
|
|
|
|
out = out.findAll{ it.length() >= 2 && !(it ==~ /[1][0-9][1-9]/) && it =~ /^[@.\p{L}\p{Digit}]/ } // MUST START WITH UNICODE LETTER
|
2014-01-11 04:04:49 -05:00
|
|
|
|
out = out.findAll{ !MediaDetection.releaseInfo.structureRootPattern.matcher(it).matches() } // IGNORE NAMES THAT OVERLAP WITH MEDIA FOLDER NAMES
|
2014-09-15 19:49:19 -04:00
|
|
|
|
|
2014-09-15 15:04:25 -04:00
|
|
|
|
return out
|
2014-01-06 18:22:31 -05:00
|
|
|
|
}
|
|
|
|
|
|
2013-11-20 05:07:25 -05:00
|
|
|
|
def treeSort(list, keyFunction) {
|
|
|
|
|
def sorter = new TreeMap(String.CASE_INSENSITIVE_ORDER)
|
|
|
|
|
list.each{
|
|
|
|
|
sorter.put(keyFunction(it), it)
|
|
|
|
|
}
|
|
|
|
|
return sorter.values()
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-18 15:41:39 -04:00
|
|
|
|
def csv(f, delim, keyIndex, valueIndex) {
|
|
|
|
|
def values = [:]
|
|
|
|
|
if (f.isFile()) {
|
|
|
|
|
f.splitEachLine(delim, 'UTF-8') { line ->
|
|
|
|
|
values.put(line[keyIndex], tryQuietly{ line[valueIndex] })
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return values
|
|
|
|
|
}
|
|
|
|
|
|
2013-08-10 01:23:14 -04:00
|
|
|
|
|
2016-11-19 12:12:09 -05:00
|
|
|
|
// ------------------------------ BUILD MOVIE INDEX ------------------------------ //
|
|
|
|
|
|
2016-03-12 08:46:42 -05:00
|
|
|
|
|
|
|
|
|
if (_args.mode == /no-index/) {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2014-03-09 08:50:03 -04:00
|
|
|
|
|
2016-11-19 12:12:09 -05:00
|
|
|
|
def movies_index = [:]
|
2016-03-26 06:02:27 -04:00
|
|
|
|
|
2016-11-19 12:12:09 -05:00
|
|
|
|
['ancient-movies.txt', 'recent-movies.txt'].each{
|
2018-03-12 05:46:14 -04:00
|
|
|
|
movies_index << csv(dir_data.resolve(it), '\t', 1, [1..-1])
|
2016-07-19 14:42:47 -04:00
|
|
|
|
}
|
2013-11-20 05:07:25 -05:00
|
|
|
|
|
2018-03-12 05:46:14 -04:00
|
|
|
|
def tmdb_txt = dir_data.resolve('tmdb.txt')
|
2013-11-20 05:07:25 -05:00
|
|
|
|
def tmdb_index = csv(tmdb_txt, '\t', 1, [0..-1])
|
2013-08-10 01:23:14 -04:00
|
|
|
|
|
2014-08-27 14:26:06 -04:00
|
|
|
|
def tmdb = []
|
2016-11-19 12:12:09 -05:00
|
|
|
|
movies_index.values().each{ m ->
|
2014-01-23 13:18:25 -05:00
|
|
|
|
def sync = System.currentTimeMillis()
|
2016-11-19 12:12:09 -05:00
|
|
|
|
if (tmdb_index.containsKey(m[0]) && (sync - tmdb_index[m[0]][0].toLong()) < ((m[1].toInteger() < 2000 ? 360 : 120) * 24 * 60 * 60 * 1000L) ) {
|
2014-08-27 14:26:06 -04:00
|
|
|
|
tmdb << tmdb_index[m[0]]
|
|
|
|
|
return
|
2013-11-20 05:07:25 -05:00
|
|
|
|
}
|
|
|
|
|
try {
|
2014-08-22 02:59:30 -04:00
|
|
|
|
def info = WebServices.TheMovieDB.getMovieInfo("tt${m[0]}", Locale.ENGLISH, true)
|
2014-10-31 06:36:18 -04:00
|
|
|
|
|
2016-11-22 06:20:00 -05:00
|
|
|
|
if (info.votes <= 1 || info.rating <= 2) {
|
|
|
|
|
throw new IllegalArgumentException('Movie not popular enough: ' + info)
|
|
|
|
|
}
|
2014-10-31 06:36:18 -04:00
|
|
|
|
|
2014-01-23 13:18:25 -05:00
|
|
|
|
def names = [info.name, info.originalName] + info.alternativeTitles
|
2016-11-19 12:12:09 -05:00
|
|
|
|
[info?.released?.year, m[1]].findResults{ it?.toInteger() }.unique().each{ y ->
|
2014-08-27 14:26:06 -04:00
|
|
|
|
def row = [sync, m[0].pad(7), info.id.pad(7), y.pad(4)] + names
|
2016-03-26 06:25:45 -04:00
|
|
|
|
log.info "Update ${m[0..2]}: $row"
|
2014-08-27 14:26:06 -04:00
|
|
|
|
tmdb << row
|
2014-01-23 13:18:25 -05:00
|
|
|
|
}
|
2015-11-12 06:43:13 -05:00
|
|
|
|
} catch(IllegalArgumentException | FileNotFoundException e) {
|
2016-06-12 06:33:48 -04:00
|
|
|
|
printException(e)
|
2016-11-19 12:12:09 -05:00
|
|
|
|
def row = [sync, m[0].pad(7), 0, m[1], m[2]]
|
|
|
|
|
log.info "[BAD] Update $m: $row"
|
2014-08-27 14:26:06 -04:00
|
|
|
|
tmdb << row
|
2013-11-20 05:07:25 -05:00
|
|
|
|
}
|
|
|
|
|
}
|
2016-11-19 12:12:09 -05:00
|
|
|
|
|
2013-11-21 09:31:31 -05:00
|
|
|
|
tmdb*.join('\t').join('\n').saveAs(tmdb_txt)
|
2013-11-20 05:07:25 -05:00
|
|
|
|
|
2016-11-19 12:12:09 -05:00
|
|
|
|
|
2016-07-19 14:23:43 -04:00
|
|
|
|
def movies = tmdb.findResults{
|
2013-11-20 05:07:25 -05:00
|
|
|
|
def ity = it[1..3] // imdb id, tmdb id, year
|
2014-01-06 18:22:31 -05:00
|
|
|
|
def names = getNamePermutations(it[4..-1]).findAll{ isValidMovieName(it) }
|
2013-11-21 11:31:09 -05:00
|
|
|
|
if (ity[0].toInteger() > 0 && ity[1].toInteger() > 0 && names.size() > 0)
|
2013-11-20 05:07:25 -05:00
|
|
|
|
return ity + names
|
|
|
|
|
else
|
|
|
|
|
return null
|
|
|
|
|
}
|
|
|
|
|
movies = treeSort(movies, { it[3, 2].join(' ') })
|
|
|
|
|
|
2013-08-10 01:23:14 -04:00
|
|
|
|
// sanity check
|
2014-10-31 11:59:16 -04:00
|
|
|
|
if (movies.size() < 20000) { die('Movie index sanity failed:' + movies.size()) }
|
2014-01-24 12:31:33 -05:00
|
|
|
|
pack(moviedb_out, movies*.join('\t'))
|
2013-11-20 05:07:25 -05:00
|
|
|
|
|
2013-08-10 01:23:14 -04:00
|
|
|
|
|
2016-11-19 12:12:09 -05:00
|
|
|
|
// ------------------------------ BUILD SERIES INDEX ------------------------------ //
|
2013-08-10 01:23:14 -04:00
|
|
|
|
|
2013-11-20 05:07:25 -05:00
|
|
|
|
|
2018-03-12 05:46:14 -04:00
|
|
|
|
def tvdb_txt = dir_data.resolve('tvdb.txt')
|
2013-12-02 13:25:06 -05:00
|
|
|
|
def tvdb = [:]
|
2014-03-09 08:50:03 -04:00
|
|
|
|
|
|
|
|
|
if (tvdb_txt.exists()) {
|
2014-08-14 13:29:34 -04:00
|
|
|
|
tvdb_txt.eachLine('UTF-8'){
|
2016-11-19 07:16:00 -05:00
|
|
|
|
def line = it.split('\t') as List
|
|
|
|
|
|
|
|
|
|
tvdb.put(line[1] as Integer, [line[0] as Long, line[1] as Integer, line[2], line[3] as Float, line[4] as Float, line[5] as Integer] + line[6..<line.size()])
|
2014-03-09 08:50:03 -04:00
|
|
|
|
}
|
2013-08-10 01:23:14 -04:00
|
|
|
|
}
|
|
|
|
|
|
2014-08-15 23:07:51 -04:00
|
|
|
|
def tvdb_updates = [:] as TreeMap
|
2018-03-12 05:46:14 -04:00
|
|
|
|
dir_data.resolve('updates_all.xml').eachLine('UTF-8'){
|
2014-08-14 13:29:34 -04:00
|
|
|
|
def m = (it =~ '<Series><id>(\\d+)</id><time>(\\d+)</time></Series>')
|
2014-08-15 22:40:39 -04:00
|
|
|
|
while(m.find()) {
|
|
|
|
|
def id = m.group(1) as Integer
|
|
|
|
|
def time = m.group(2) as Integer
|
|
|
|
|
tvdb_updates[id] = [id: id, time: time]
|
2014-08-14 13:29:34 -04:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-12-05 11:21:13 -05:00
|
|
|
|
// blacklist crap entries
|
2014-12-05 12:28:56 -05:00
|
|
|
|
tvdb_updates.remove(219901)
|
|
|
|
|
tvdb_updates.remove(256135)
|
2014-12-05 11:21:13 -05:00
|
|
|
|
|
2014-08-14 13:29:34 -04:00
|
|
|
|
|
2014-08-15 22:40:39 -04:00
|
|
|
|
tvdb_updates.values().each{ update ->
|
2014-03-10 01:34:53 -04:00
|
|
|
|
if (tvdb[update.id] == null || update.time > tvdb[update.id][0]) {
|
2013-08-10 01:23:14 -04:00
|
|
|
|
try {
|
2015-02-22 05:52:33 -05:00
|
|
|
|
retry(2, 60000) {
|
2014-08-15 05:58:42 -04:00
|
|
|
|
def seriesNames = []
|
2016-11-19 07:16:00 -05:00
|
|
|
|
|
2018-06-07 03:52:28 -04:00
|
|
|
|
def seriesInfo = TheTVDB.getSeriesInfo(update.id, Locale.ENGLISH)
|
|
|
|
|
def imdbid = seriesInfo.imdbId ?: ''
|
2016-03-26 06:02:27 -04:00
|
|
|
|
|
2018-06-07 03:52:28 -04:00
|
|
|
|
seriesNames += seriesInfo.name
|
|
|
|
|
seriesNames += seriesInfo.aliasNames
|
|
|
|
|
|
|
|
|
|
def rating = seriesInfo.rating ?: 0
|
|
|
|
|
def votes = seriesInfo.ratingCount ?: 0
|
|
|
|
|
def year = seriesInfo.startDate?.year ?: 0
|
2016-03-26 06:02:27 -04:00
|
|
|
|
|
2018-07-21 10:00:53 -04:00
|
|
|
|
if (imdbid) tryLogCatch {
|
2018-07-21 03:03:12 -04:00
|
|
|
|
def omdbInfo = OMDb.getMovieInfo(new Movie(imdbid.match(/tt(\d+)/) as int))
|
|
|
|
|
|
|
|
|
|
seriesNames += omdbInfo.name
|
|
|
|
|
seriesNames += omdbInfo.originalName
|
|
|
|
|
seriesNames += omdbInfo.alternativeTitles
|
|
|
|
|
|
2018-07-21 10:52:03 -04:00
|
|
|
|
if (omdbInfo.votes && omdbInfo.rating) {
|
|
|
|
|
votes = omdbInfo.votes as int
|
|
|
|
|
rating = omdbInfo.rating as double
|
|
|
|
|
|
|
|
|
|
// scrape extra alias titles from webpage (not supported yet by API yet)
|
|
|
|
|
if (votes >= 60 && rating >= 4) {
|
|
|
|
|
def jsoup = org.jsoup.Jsoup.connect("https://www.thetvdb.com/series/${seriesInfo.slug}").get()
|
|
|
|
|
def intlseries = jsoup.select('#translations div.change_translation_text')*.attr('data-title')*.trim()
|
|
|
|
|
log.fine "Scraped data $intlseries for series $seriesNames"
|
|
|
|
|
seriesNames += intlseries
|
|
|
|
|
}
|
2013-08-10 01:23:14 -04:00
|
|
|
|
}
|
|
|
|
|
}
|
2014-08-15 05:58:42 -04:00
|
|
|
|
|
2016-11-19 07:16:00 -05:00
|
|
|
|
def data = [update.time, update.id, imdbid, rating, votes, year] + seriesNames.findAll{ it != null && it.length() > 0 }
|
2016-03-26 06:02:27 -04:00
|
|
|
|
log.info "Update $update => $data"
|
2018-06-07 03:52:28 -04:00
|
|
|
|
|
|
|
|
|
tvdb.put(update.id, data)
|
2013-08-10 01:23:14 -04:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
catch(Throwable e) {
|
2016-06-12 06:33:48 -04:00
|
|
|
|
printException(e)
|
2016-11-19 07:16:00 -05:00
|
|
|
|
def data = [update.time, update.id, '', 0, 0, 0]
|
2013-08-10 01:23:14 -04:00
|
|
|
|
tvdb.put(update.id, data)
|
2016-03-26 06:25:45 -04:00
|
|
|
|
log.info "[BAD] Update $update => $data"
|
2013-08-10 01:23:14 -04:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-12-02 13:25:06 -05:00
|
|
|
|
|
|
|
|
|
// remove entries that have become invalid
|
|
|
|
|
tvdb.keySet().toList().each{ id ->
|
2014-08-15 22:40:39 -04:00
|
|
|
|
if (tvdb_updates[id] == null) {
|
2016-11-19 07:16:00 -05:00
|
|
|
|
log.finest "Invalid ID found: ${tvdb[id]}"
|
2013-12-02 13:25:06 -05:00
|
|
|
|
tvdb.remove(id)
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-05-12 15:18:31 -04:00
|
|
|
|
tvdb.values().findResults{ it.collect{ it.toString().replace('\t', '').trim() }.join('\t') }.join('\n').saveAs(tvdb_txt)
|
2013-08-10 01:23:14 -04:00
|
|
|
|
|
2016-04-12 05:27:28 -04:00
|
|
|
|
// additional custom mappings
|
2018-03-12 05:32:05 -04:00
|
|
|
|
def extraAliasNames = csv(dir_release.resolve('add-series-alias.txt'), '\t', 0, [1..-1])
|
2013-08-10 01:23:14 -04:00
|
|
|
|
|
|
|
|
|
def thetvdb_index = []
|
2014-01-06 18:22:31 -05:00
|
|
|
|
tvdb.values().each{ r ->
|
2014-03-09 08:50:03 -04:00
|
|
|
|
def tvdb_id = r[1]
|
2014-08-15 05:58:42 -04:00
|
|
|
|
def rating = r[3]
|
|
|
|
|
def votes = r[4]
|
2016-11-19 07:16:00 -05:00
|
|
|
|
def year = r[5]
|
|
|
|
|
def names = r[6..<r.size()]
|
|
|
|
|
|
|
|
|
|
// ignore invalid entries
|
|
|
|
|
if (names.isEmpty()) {
|
|
|
|
|
return
|
|
|
|
|
}
|
2016-03-26 06:02:27 -04:00
|
|
|
|
|
2016-11-22 06:04:45 -05:00
|
|
|
|
if (year > 0) {
|
|
|
|
|
names.add(1, names[0].replaceTrailingBrackets() + " ($year)")
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-12 05:27:28 -04:00
|
|
|
|
def alias = extraAliasNames[names[0]]
|
|
|
|
|
if (alias) {
|
2017-02-01 15:44:57 -05:00
|
|
|
|
log.finest "Add alias ${names[0]} => ${alias}"
|
2016-04-12 05:27:28 -04:00
|
|
|
|
names += alias
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-19 07:16:00 -05:00
|
|
|
|
// always include if alias has been manually added
|
2018-07-21 12:03:54 -04:00
|
|
|
|
if (alias != null || (votes >= 5 && rating >= 4) || (votes >= 2 && rating >= 6) || (votes >= 1 && rating >= 10)) {
|
2014-08-15 05:58:42 -04:00
|
|
|
|
getNamePermutations(names).each{ n ->
|
2014-03-09 08:50:03 -04:00
|
|
|
|
thetvdb_index << [tvdb_id, n]
|
|
|
|
|
}
|
2013-08-10 01:23:14 -04:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-08-05 05:18:21 -04:00
|
|
|
|
thetvdb_index = thetvdb_index.findResults{ [it[0] as Integer, it[1].replaceAll(/\s+/, ' ').trim()] }.findAll{ !(it[1] =~ /(?i:Duplicate|Series.Not.Permitted|Episode.#\d+.\d+)/ || it[1] =~ /\d{6,}/ || it[1].startsWith('*') || it[1].endsWith('*') || it[1].length() < 2) }
|
2014-05-12 15:18:31 -04:00
|
|
|
|
thetvdb_index = thetvdb_index.sort{ a, b -> a[0] <=> b[0] }
|
2013-08-10 01:23:14 -04:00
|
|
|
|
|
|
|
|
|
// join and sort
|
2014-11-28 14:26:57 -05:00
|
|
|
|
def thetvdb_txt = thetvdb_index.groupBy{ it[0] }.findResults{ k, v -> ([k.pad(6)] + v*.getAt(1).unique{ it.toLowerCase() }).join('\t') }
|
2013-12-15 13:35:41 -05:00
|
|
|
|
|
2013-08-10 01:23:14 -04:00
|
|
|
|
// sanity check
|
2014-04-19 12:54:25 -04:00
|
|
|
|
if (thetvdb_txt.size() < 4000) { die('TheTVDB index sanity failed: ' + thetvdb_txt.size()) }
|
2014-01-24 12:31:33 -05:00
|
|
|
|
pack(thetvdb_out, thetvdb_txt)
|
2013-08-10 01:23:14 -04:00
|
|
|
|
|
|
|
|
|
|
2016-11-19 12:12:09 -05:00
|
|
|
|
// ------------------------------ BUILD OSDB INDEX ------------------------------ //
|
2013-11-20 05:07:25 -05:00
|
|
|
|
|
2013-08-10 01:23:14 -04:00
|
|
|
|
|
2015-05-24 18:54:59 -04:00
|
|
|
|
def osdb = []
|
|
|
|
|
|
2018-03-12 05:46:14 -04:00
|
|
|
|
dir_data.resolve('osdb.txt').eachLine('UTF-8'){
|
2015-05-24 18:54:59 -04:00
|
|
|
|
def fields = it.split(/\t/)*.trim()
|
|
|
|
|
|
|
|
|
|
// 0 IDMovie, 1 IDMovieImdb, 2 MovieName, 3 MovieYear, 4 MovieKind, 5 MoviePriority
|
|
|
|
|
if (fields.size() == 6 && fields[1] ==~ /\d+/ && fields[3] ==~ /\d{4}/) {
|
2015-05-25 04:28:38 -04:00
|
|
|
|
if (fields[4] ==~ /movie|tv.series/ && isValidMovieName(fields[2]) && (fields[3] as int) >= 1970 && (fields[5] as int) >= 500) {
|
2015-05-24 18:54:59 -04:00
|
|
|
|
// 0 imdbid, 1 name, 2 year, 3 kind, 4 priority
|
|
|
|
|
osdb << [fields[1] as int, fields[2], fields[3] as int, fields[4] == /movie/ ? 'm' : fields[4] == /tv series/ ? 's' : '?', fields[5] as int]
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// sort reverse by score
|
|
|
|
|
osdb.sort{ a, b -> b[4] <=> a[4] }
|
|
|
|
|
|
|
|
|
|
// reset score/priority because it's currently not used
|
|
|
|
|
osdb*.set(4, 0)
|
|
|
|
|
|
2015-05-25 04:28:38 -04:00
|
|
|
|
// map by imdbid
|
|
|
|
|
def tvdb_index = tvdb.values().findAll{ it[2] =~ /tt(\d+)/ }.collectEntries{ [it[2].substring(2).pad(7), it] }
|
|
|
|
|
|
2015-05-24 18:54:59 -04:00
|
|
|
|
// collect final output data
|
|
|
|
|
osdb = osdb.findResults{
|
|
|
|
|
def names = [it[1]]
|
|
|
|
|
if (it[3] == 'm') {
|
|
|
|
|
def tmdb_entry = tmdb_index[it[0].pad(7)]
|
2015-05-25 04:28:38 -04:00
|
|
|
|
if (tmdb_entry != null && tmdb_entry.size() > 4) {
|
2015-05-24 18:54:59 -04:00
|
|
|
|
names += tmdb_entry[4..-1]
|
|
|
|
|
}
|
2015-05-25 04:28:38 -04:00
|
|
|
|
} else if (it[3] == 's') {
|
|
|
|
|
def tvdb_entry = tvdb_index[it[0].pad(7)]
|
|
|
|
|
if (tvdb_entry != null && tvdb_entry.size() > 5) {
|
|
|
|
|
names += tvdb_entry[5..-1]
|
|
|
|
|
}
|
2015-05-24 18:54:59 -04:00
|
|
|
|
}
|
2015-05-25 04:28:38 -04:00
|
|
|
|
// 0 kind, 1 score, 2 imdbid, 3 year, 4-n names
|
2015-05-24 18:54:59 -04:00
|
|
|
|
return [it[3], it[4], it[0], it[2]] + names.unique()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// sanity check
|
2015-06-13 01:03:55 -04:00
|
|
|
|
if (osdb.size() < 15000) { die('OSDB index sanity failed:' + osdb.size()) }
|
2015-05-24 18:54:59 -04:00
|
|
|
|
pack(osdb_out, osdb*.join('\t'))
|
2015-05-25 04:41:33 -04:00
|
|
|
|
|
|
|
|
|
|
2016-11-19 12:12:09 -05:00
|
|
|
|
// ------------------------------ BUILD ANIDB INDEX ------------------------------ //
|
2015-05-25 04:41:33 -04:00
|
|
|
|
|
|
|
|
|
|
2019-05-03 09:38:35 -04:00
|
|
|
|
def ac = new AnidbClient('filebot', 6)
|
|
|
|
|
ac.getCache('root').put('anime-titles.dat.gz', dir_data.resolve('anidb.txt').bytes)
|
|
|
|
|
|
|
|
|
|
def anidb = ac.animeTitles as List
|
2016-04-12 05:27:28 -04:00
|
|
|
|
def animeExcludes = [] as Set
|
2016-02-05 11:31:53 -05:00
|
|
|
|
|
|
|
|
|
// exclude anime movies from anime index
|
2018-03-12 05:46:14 -04:00
|
|
|
|
dir_data.resolve('anime-list.xml').eachLine('UTF-8') {
|
2016-02-06 07:23:53 -05:00
|
|
|
|
if (it =~ /tvdbid="movie"/ || it =~ /imdbid="ttd\+"/) {
|
2016-02-05 11:31:53 -05:00
|
|
|
|
animeExcludes << it.match(/anidbid="(\d+)"/).toInteger()
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-05-25 04:41:33 -04:00
|
|
|
|
|
|
|
|
|
def anidb_index = anidb.findResults{
|
2016-03-26 13:40:54 -04:00
|
|
|
|
if (animeExcludes.contains(it.id))
|
2016-02-05 11:31:53 -05:00
|
|
|
|
return null
|
|
|
|
|
|
2015-05-25 04:41:33 -04:00
|
|
|
|
def names = it.effectiveNames*.replaceAll(/\s+/, ' ')*.trim()*.replaceAll(/['`´‘’ʻ]+/, /'/)
|
|
|
|
|
names = getNamePermutations(names)
|
|
|
|
|
|
2016-03-26 13:40:54 -04:00
|
|
|
|
return names.empty ? null : [it.id.pad(5)] + names.take(4)
|
2015-05-25 04:41:33 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// join and sort
|
|
|
|
|
def anidb_txt = anidb_index.findResults{ row -> row.join('\t') }.sort().unique()
|
|
|
|
|
|
|
|
|
|
// sanity check
|
2016-02-05 11:31:53 -05:00
|
|
|
|
if (anidb_txt.size() < 8000 || animeExcludes.size() < 500) { die('AniDB index sanity failed:' + anidb_txt.size()) }
|
2015-05-25 04:41:33 -04:00
|
|
|
|
pack(anidb_out, anidb_txt)
|