1
0
mirror of https://github.com/mitb-archive/filebot synced 2024-12-23 00:08:51 -05:00

Refactor BuildData.groovy

This commit is contained in:
Reinhard Pointner 2016-11-19 20:16:00 +08:00
parent 592e3ae608
commit a424b55b8e
2 changed files with 47 additions and 35 deletions

View File

@ -7,11 +7,11 @@ import org.tukaani.xz.*
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */
def dir_root = ".." def dir_root = ('..' as File).getCanonicalFile()
def dir_website = "${dir_root}/website" def dir_website = dir_root.resolve('website')
def dir_data = "${dir_website}/data" def dir_data = dir_website.resolve('data')
new File(dir_data).mkdirs() dir_data.mkdirs()
// sort and check shared regex collections // sort and check shared regex collections
def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw.githubusercontent.com/filebot/data/master') def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw.githubusercontent.com/filebot/data/master')
@ -23,7 +23,7 @@ def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw
'series-mappings.txt' 'series-mappings.txt'
].each{ ].each{
def input = new URL(dir_data_master + '/' + it) def input = new URL(dir_data_master + '/' + it)
def output = new File("${dir_data}/${it}") def output = dir_data.resolve(it)
log.fine "Fetch $input" log.fine "Fetch $input"
def lines = new TreeSet(String.CASE_INSENSITIVE_ORDER) def lines = new TreeSet(String.CASE_INSENSITIVE_ORDER)
@ -40,7 +40,8 @@ def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw
def reviews = [] def reviews = []
new File("${dir_root}/reviews.tsv").eachLine('UTF-8'){
dir_root.resolve('reviews.tsv').eachLine('UTF-8'){
def s = it.split(/\t/, 3)*.trim() def s = it.split(/\t/, 3)*.trim()
reviews << [user: s[0], date: s[1], text: s[2]] reviews << [user: s[0], date: s[1], text: s[2]]
} }
@ -48,17 +49,17 @@ reviews = reviews.sort{ it.date }
def json = new groovy.json.JsonBuilder() def json = new groovy.json.JsonBuilder()
json.call(reviews as List) json.call(reviews as List)
json.toPrettyString().saveAs("${dir_website}/reviews.json") json.toPrettyString().saveAs(dir_website.resolve('reviews.json'))
log.info "Reviews: " + reviews.size() log.info "Reviews: " + reviews.size()
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */
def moviedb_out = new File("${dir_data}/moviedb.txt") def moviedb_out = dir_data.resolve('moviedb.txt')
def thetvdb_out = new File("${dir_data}/thetvdb.txt") def thetvdb_out = dir_data.resolve('thetvdb.txt')
def anidb_out = new File("${dir_data}/anidb.txt") def anidb_out = dir_data.resolve('anidb.txt')
def osdb_out = new File("${dir_data}/osdb.txt") def osdb_out = dir_data.resolve('osdb.txt')
def pack(file, lines) { def pack(file, lines) {
@ -157,7 +158,7 @@ def omdb = new TreeMap()
} }
def tmdb_txt = new File('tmdb.txt') def tmdb_txt = 'tmdb.txt' as File
def tmdb_index = csv(tmdb_txt, '\t', 1, [0..-1]) def tmdb_index = csv(tmdb_txt, '\t', 1, [0..-1])
def tmdb = [] def tmdb = []
@ -207,19 +208,19 @@ pack(moviedb_out, movies*.join('\t'))
// BUILD tvdb index // BUILD tvdb index
def tvdb_txt = new File('tvdb.txt') def tvdb_txt = 'tvdb.txt' as File
def tvdb = [:] def tvdb = [:]
if (tvdb_txt.exists()) { if (tvdb_txt.exists()) {
tvdb_txt.eachLine('UTF-8'){ tvdb_txt.eachLine('UTF-8'){
def line = it.split('\t').toList() def line = it.split('\t') as List
def names = line.subList(5, line.size())
tvdb.put(line[1] as Integer, [line[0] as Long, line[1] as Integer, line[2], line[3] as Float, line[4] as Float] + names) tvdb.put(line[1] as Integer, [line[0] as Long, line[1] as Integer, line[2], line[3] as Float, line[4] as Float, line[5] as Integer] + line[6..<line.size()])
} }
} }
def tvdb_updates = [:] as TreeMap def tvdb_updates = [:] as TreeMap
new File('updates_all.xml').eachLine('UTF-8'){ ('updates_all.xml' as File).eachLine('UTF-8'){
def m = (it =~ '<Series><id>(\\d+)</id><time>(\\d+)</time></Series>') def m = (it =~ '<Series><id>(\\d+)</id><time>(\\d+)</time></Series>')
while(m.find()) { while(m.find()) {
def id = m.group(1) as Integer def id = m.group(1) as Integer
@ -239,18 +240,18 @@ tvdb_updates.values().each{ update ->
retry(2, 60000) { retry(2, 60000) {
def seriesNames = [] def seriesNames = []
def xml = new XmlSlurper().parse("http://thetvdb.com/api/BA864DEE427E384A/series/${update.id}/en.xml") def xml = new XmlSlurper().parse("http://thetvdb.com/api/BA864DEE427E384A/series/${update.id}/en.xml")
def imdbid = xml.Series.IMDB_ID.text() def imdbid = any{ xml.Series.IMDB_ID.text().match(/tt\d+/) }{ '' }
seriesNames += xml.Series.SeriesName.text() seriesNames += xml.Series.SeriesName.text()
def rating = tryQuietly{ xml.Series.Rating.text().toFloat() } def rating = any{ xml.Series.Rating.text().toFloat() }{ 0 }
def votes = tryQuietly{ xml.Series.RatingCount.text().toFloat() } def votes = any{ xml.Series.RatingCount.text().toFloat() }{ 0 }
def year = any{ xml.Series.FirstAired.text().match(/\d{4}/) as Integer }{ 0 }
// only retrieve additional data for reasonably popular shows // only retrieve additional data for reasonably popular shows
if (votes >= 5 && rating >= 4) { if (imdbid && votes >= 5 && rating >= 4) {
tryLogCatch{ tryLogCatch{
if (imdbid =~ /tt(\d+)/) { seriesNames += OMDb.getMovieDescriptor(new Movie(imdbid.match(/tt(\d+)/) as int), Locale.ENGLISH).getName()
seriesNames += OMDb.getMovieDescriptor(new Movie(imdbid.match(/tt(\d+)/) as int), Locale.ENGLISH).getName()
}
} }
// scrape extra alias titles from webpage (not supported yet by API) // scrape extra alias titles from webpage (not supported yet by API)
@ -274,14 +275,14 @@ tvdb_updates.values().each{ update ->
seriesNames += intlseries seriesNames += intlseries
} }
def data = [update.time, update.id, imdbid, rating ?: 0, votes ?: 0] + seriesNames.findAll{ it != null && it.length() > 0 } def data = [update.time, update.id, imdbid, rating, votes, year] + seriesNames.findAll{ it != null && it.length() > 0 }
tvdb.put(update.id, data) tvdb.put(update.id, data)
log.info "Update $update => $data" log.info "Update $update => $data"
} }
} }
catch(Throwable e) { catch(Throwable e) {
printException(e) printException(e)
def data = [update.time, update.id, '', 0, 0] def data = [update.time, update.id, '', 0, 0, 0]
tvdb.put(update.id, data) tvdb.put(update.id, data)
log.info "[BAD] Update $update => $data" log.info "[BAD] Update $update => $data"
} }
@ -291,21 +292,27 @@ tvdb_updates.values().each{ update ->
// remove entries that have become invalid // remove entries that have become invalid
tvdb.keySet().toList().each{ id -> tvdb.keySet().toList().each{ id ->
if (tvdb_updates[id] == null) { if (tvdb_updates[id] == null) {
log.info "Invalid ID found: ${tvdb[id]}" log.finest "Invalid ID found: ${tvdb[id]}"
tvdb.remove(id) tvdb.remove(id)
} }
} }
tvdb.values().findResults{ it.collect{ it.toString().replace('\t', '').trim() }.join('\t') }.join('\n').saveAs(tvdb_txt) tvdb.values().findResults{ it.collect{ it.toString().replace('\t', '').trim() }.join('\t') }.join('\n').saveAs(tvdb_txt)
// additional custom mappings // additional custom mappings
def extraAliasNames = csv("${dir_data}/add-series-alias.txt") def extraAliasNames = csv(dir_data.resolve('add-series-alias.txt'), '\t', 0, [1..-1])
def thetvdb_index = [] def thetvdb_index = []
tvdb.values().each{ r -> tvdb.values().each{ r ->
def tvdb_id = r[1] def tvdb_id = r[1]
def rating = r[3] def rating = r[3]
def votes = r[4] def votes = r[4]
def names = r.subList(5, r.size()) def year = r[5]
def names = r[6..<r.size()]
// ignore invalid entries
if (names.isEmpty()) {
return
}
def alias = extraAliasNames[names[0]] def alias = extraAliasNames[names[0]]
if (alias) { if (alias) {
@ -313,7 +320,12 @@ tvdb.values().each{ r ->
names += alias names += alias
} }
if (alias !=null || (votes >= 5 && rating >= 4) || (votes >= 2 && rating >= 6) || (votes >= 1 && rating >= 10)) { if (year > 0 && !names[0].endsWith(" ($year)")) {
names.add(1, names[0] + " ($year)")
}
// always include if alias has been manually added
if (alias != null || (votes >= 5 && rating >= 4) || (votes >= 2 && rating >= 6) || (votes >= 1 && rating >= 10)) {
getNamePermutations(names).each{ n -> getNamePermutations(names).each{ n ->
thetvdb_index << [tvdb_id, n] thetvdb_index << [tvdb_id, n]
} }
@ -337,7 +349,7 @@ pack(thetvdb_out, thetvdb_txt)
// BUILD osdb index // BUILD osdb index
def osdb = [] def osdb = []
new File('osdb.txt').eachLine('UTF-8'){ ('osdb.txt' as File).eachLine('UTF-8'){
def fields = it.split(/\t/)*.trim() def fields = it.split(/\t/)*.trim()
// 0 IDMovie, 1 IDMovieImdb, 2 MovieName, 3 MovieYear, 4 MovieKind, 5 MoviePriority // 0 IDMovie, 1 IDMovieImdb, 2 MovieName, 3 MovieYear, 4 MovieKind, 5 MoviePriority
@ -389,7 +401,7 @@ def anidb = new AnidbClient('filebot', 6).getAnimeTitles() as List
def animeExcludes = [] as Set def animeExcludes = [] as Set
// exclude anime movies from anime index // exclude anime movies from anime index
new File('anime-list.xml').eachLine('UTF-8') { ('anime-list.xml' as File).eachLine('UTF-8') {
if (it =~ /tvdbid="movie"/ || it =~ /imdbid="ttd\+"/) { if (it =~ /tvdbid="movie"/ || it =~ /imdbid="ttd\+"/) {
animeExcludes << it.match(/anidbid="(\d+)"/).toInteger() animeExcludes << it.match(/anidbid="(\d+)"/).toInteger()
} }

View File

@ -80,14 +80,14 @@ public class ExpressionFormatFunctions {
} }
public static List<String> readLines(String path) throws IOException { public static List<String> readLines(String path) throws IOException {
return FileUtilities.readLines(getFile(path)); return FileUtilities.readLines(getUserFile(path));
} }
public static Object readXml(String path) throws Exception { public static Object readXml(String path) throws Exception {
return new XmlSlurper().parse(getFile(path)); return new XmlSlurper().parse(getUserFile(path));
} }
public static File getFile(String path) { public static File getUserFile(String path) {
File f = new File(path); File f = new File(path);
if (!f.isAbsolute()) { if (!f.isAbsolute()) {