diff --git a/build-data/BuildData.groovy b/build-data/BuildData.groovy index 9365dd9d..4ee1dd32 100755 --- a/build-data/BuildData.groovy +++ b/build-data/BuildData.groovy @@ -7,11 +7,11 @@ import org.tukaani.xz.* /* ------------------------------------------------------------------------- */ -def dir_root = ".." -def dir_website = "${dir_root}/website" -def dir_data = "${dir_website}/data" +def dir_root = ('..' as File).getCanonicalFile() +def dir_website = dir_root.resolve('website') +def dir_data = dir_website.resolve('data') -new File(dir_data).mkdirs() +dir_data.mkdirs() // sort and check shared regex collections def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw.githubusercontent.com/filebot/data/master') @@ -23,7 +23,7 @@ def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw 'series-mappings.txt' ].each{ def input = new URL(dir_data_master + '/' + it) - def output = new File("${dir_data}/${it}") + def output = dir_data.resolve(it) log.fine "Fetch $input" def lines = new TreeSet(String.CASE_INSENSITIVE_ORDER) @@ -40,7 +40,8 @@ def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw def reviews = [] -new File("${dir_root}/reviews.tsv").eachLine('UTF-8'){ + +dir_root.resolve('reviews.tsv').eachLine('UTF-8'){ def s = it.split(/\t/, 3)*.trim() reviews << [user: s[0], date: s[1], text: s[2]] } @@ -48,17 +49,17 @@ reviews = reviews.sort{ it.date } def json = new groovy.json.JsonBuilder() json.call(reviews as List) -json.toPrettyString().saveAs("${dir_website}/reviews.json") +json.toPrettyString().saveAs(dir_website.resolve('reviews.json')) log.info "Reviews: " + reviews.size() /* ------------------------------------------------------------------------- */ -def moviedb_out = new File("${dir_data}/moviedb.txt") -def thetvdb_out = new File("${dir_data}/thetvdb.txt") -def anidb_out = new File("${dir_data}/anidb.txt") -def osdb_out = new File("${dir_data}/osdb.txt") +def moviedb_out = dir_data.resolve('moviedb.txt') +def thetvdb_out = dir_data.resolve('thetvdb.txt') +def anidb_out = dir_data.resolve('anidb.txt') +def osdb_out = dir_data.resolve('osdb.txt') def pack(file, lines) { @@ -157,7 +158,7 @@ def omdb = new TreeMap() } -def tmdb_txt = new File('tmdb.txt') +def tmdb_txt = 'tmdb.txt' as File def tmdb_index = csv(tmdb_txt, '\t', 1, [0..-1]) def tmdb = [] @@ -207,19 +208,19 @@ pack(moviedb_out, movies*.join('\t')) // BUILD tvdb index -def tvdb_txt = new File('tvdb.txt') +def tvdb_txt = 'tvdb.txt' as File def tvdb = [:] if (tvdb_txt.exists()) { tvdb_txt.eachLine('UTF-8'){ - def line = it.split('\t').toList() - def names = line.subList(5, line.size()) - tvdb.put(line[1] as Integer, [line[0] as Long, line[1] as Integer, line[2], line[3] as Float, line[4] as Float] + names) + def line = it.split('\t') as List + + tvdb.put(line[1] as Integer, [line[0] as Long, line[1] as Integer, line[2], line[3] as Float, line[4] as Float, line[5] as Integer] + line[6..(\\d+)') while(m.find()) { def id = m.group(1) as Integer @@ -239,18 +240,18 @@ tvdb_updates.values().each{ update -> retry(2, 60000) { def seriesNames = [] def xml = new XmlSlurper().parse("http://thetvdb.com/api/BA864DEE427E384A/series/${update.id}/en.xml") - def imdbid = xml.Series.IMDB_ID.text() + def imdbid = any{ xml.Series.IMDB_ID.text().match(/tt\d+/) }{ '' } + seriesNames += xml.Series.SeriesName.text() - def rating = tryQuietly{ xml.Series.Rating.text().toFloat() } - def votes = tryQuietly{ xml.Series.RatingCount.text().toFloat() } + def rating = any{ xml.Series.Rating.text().toFloat() }{ 0 } + def votes = any{ xml.Series.RatingCount.text().toFloat() }{ 0 } + def year = any{ xml.Series.FirstAired.text().match(/\d{4}/) as Integer }{ 0 } // only retrieve additional data for reasonably popular shows - if (votes >= 5 && rating >= 4) { + if (imdbid && votes >= 5 && rating >= 4) { tryLogCatch{ - if (imdbid =~ /tt(\d+)/) { - seriesNames += OMDb.getMovieDescriptor(new Movie(imdbid.match(/tt(\d+)/) as int), Locale.ENGLISH).getName() - } + seriesNames += OMDb.getMovieDescriptor(new Movie(imdbid.match(/tt(\d+)/) as int), Locale.ENGLISH).getName() } // scrape extra alias titles from webpage (not supported yet by API) @@ -274,14 +275,14 @@ tvdb_updates.values().each{ update -> seriesNames += intlseries } - def data = [update.time, update.id, imdbid, rating ?: 0, votes ?: 0] + seriesNames.findAll{ it != null && it.length() > 0 } + def data = [update.time, update.id, imdbid, rating, votes, year] + seriesNames.findAll{ it != null && it.length() > 0 } tvdb.put(update.id, data) log.info "Update $update => $data" } } catch(Throwable e) { printException(e) - def data = [update.time, update.id, '', 0, 0] + def data = [update.time, update.id, '', 0, 0, 0] tvdb.put(update.id, data) log.info "[BAD] Update $update => $data" } @@ -291,21 +292,27 @@ tvdb_updates.values().each{ update -> // remove entries that have become invalid tvdb.keySet().toList().each{ id -> if (tvdb_updates[id] == null) { - log.info "Invalid ID found: ${tvdb[id]}" + log.finest "Invalid ID found: ${tvdb[id]}" tvdb.remove(id) } } tvdb.values().findResults{ it.collect{ it.toString().replace('\t', '').trim() }.join('\t') }.join('\n').saveAs(tvdb_txt) // additional custom mappings -def extraAliasNames = csv("${dir_data}/add-series-alias.txt") +def extraAliasNames = csv(dir_data.resolve('add-series-alias.txt'), '\t', 0, [1..-1]) def thetvdb_index = [] tvdb.values().each{ r -> def tvdb_id = r[1] def rating = r[3] def votes = r[4] - def names = r.subList(5, r.size()) + def year = r[5] + def names = r[6.. names += alias } - if (alias !=null || (votes >= 5 && rating >= 4) || (votes >= 2 && rating >= 6) || (votes >= 1 && rating >= 10)) { + if (year > 0 && !names[0].endsWith(" ($year)")) { + names.add(1, names[0] + " ($year)") + } + + // always include if alias has been manually added + if (alias != null || (votes >= 5 && rating >= 4) || (votes >= 2 && rating >= 6) || (votes >= 1 && rating >= 10)) { getNamePermutations(names).each{ n -> thetvdb_index << [tvdb_id, n] } @@ -337,7 +349,7 @@ pack(thetvdb_out, thetvdb_txt) // BUILD osdb index def osdb = [] -new File('osdb.txt').eachLine('UTF-8'){ +('osdb.txt' as File).eachLine('UTF-8'){ def fields = it.split(/\t/)*.trim() // 0 IDMovie, 1 IDMovieImdb, 2 MovieName, 3 MovieYear, 4 MovieKind, 5 MoviePriority @@ -389,7 +401,7 @@ def anidb = new AnidbClient('filebot', 6).getAnimeTitles() as List def animeExcludes = [] as Set // exclude anime movies from anime index -new File('anime-list.xml').eachLine('UTF-8') { +('anime-list.xml' as File).eachLine('UTF-8') { if (it =~ /tvdbid="movie"/ || it =~ /imdbid="ttd\+"/) { animeExcludes << it.match(/anidbid="(\d+)"/).toInteger() } diff --git a/source/net/filebot/format/ExpressionFormatFunctions.java b/source/net/filebot/format/ExpressionFormatFunctions.java index 701613e1..2b3c80cf 100644 --- a/source/net/filebot/format/ExpressionFormatFunctions.java +++ b/source/net/filebot/format/ExpressionFormatFunctions.java @@ -80,14 +80,14 @@ public class ExpressionFormatFunctions { } public static List readLines(String path) throws IOException { - return FileUtilities.readLines(getFile(path)); + return FileUtilities.readLines(getUserFile(path)); } public static Object readXml(String path) throws Exception { - return new XmlSlurper().parse(getFile(path)); + return new XmlSlurper().parse(getUserFile(path)); } - public static File getFile(String path) { + public static File getUserFile(String path) { File f = new File(path); if (!f.isAbsolute()) {