mirror of
https://github.com/mitb-archive/filebot
synced 2024-11-15 13:55:03 -05:00
Refactor BuildData.groovy
This commit is contained in:
parent
592e3ae608
commit
a424b55b8e
@ -7,11 +7,11 @@ import org.tukaani.xz.*
|
|||||||
/* ------------------------------------------------------------------------- */
|
/* ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
|
||||||
def dir_root = ".."
|
def dir_root = ('..' as File).getCanonicalFile()
|
||||||
def dir_website = "${dir_root}/website"
|
def dir_website = dir_root.resolve('website')
|
||||||
def dir_data = "${dir_website}/data"
|
def dir_data = dir_website.resolve('data')
|
||||||
|
|
||||||
new File(dir_data).mkdirs()
|
dir_data.mkdirs()
|
||||||
|
|
||||||
// sort and check shared regex collections
|
// sort and check shared regex collections
|
||||||
def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw.githubusercontent.com/filebot/data/master')
|
def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw.githubusercontent.com/filebot/data/master')
|
||||||
@ -23,7 +23,7 @@ def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw
|
|||||||
'series-mappings.txt'
|
'series-mappings.txt'
|
||||||
].each{
|
].each{
|
||||||
def input = new URL(dir_data_master + '/' + it)
|
def input = new URL(dir_data_master + '/' + it)
|
||||||
def output = new File("${dir_data}/${it}")
|
def output = dir_data.resolve(it)
|
||||||
|
|
||||||
log.fine "Fetch $input"
|
log.fine "Fetch $input"
|
||||||
def lines = new TreeSet(String.CASE_INSENSITIVE_ORDER)
|
def lines = new TreeSet(String.CASE_INSENSITIVE_ORDER)
|
||||||
@ -40,7 +40,8 @@ def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw
|
|||||||
|
|
||||||
|
|
||||||
def reviews = []
|
def reviews = []
|
||||||
new File("${dir_root}/reviews.tsv").eachLine('UTF-8'){
|
|
||||||
|
dir_root.resolve('reviews.tsv').eachLine('UTF-8'){
|
||||||
def s = it.split(/\t/, 3)*.trim()
|
def s = it.split(/\t/, 3)*.trim()
|
||||||
reviews << [user: s[0], date: s[1], text: s[2]]
|
reviews << [user: s[0], date: s[1], text: s[2]]
|
||||||
}
|
}
|
||||||
@ -48,17 +49,17 @@ reviews = reviews.sort{ it.date }
|
|||||||
|
|
||||||
def json = new groovy.json.JsonBuilder()
|
def json = new groovy.json.JsonBuilder()
|
||||||
json.call(reviews as List)
|
json.call(reviews as List)
|
||||||
json.toPrettyString().saveAs("${dir_website}/reviews.json")
|
json.toPrettyString().saveAs(dir_website.resolve('reviews.json'))
|
||||||
log.info "Reviews: " + reviews.size()
|
log.info "Reviews: " + reviews.size()
|
||||||
|
|
||||||
|
|
||||||
/* ------------------------------------------------------------------------- */
|
/* ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
|
||||||
def moviedb_out = new File("${dir_data}/moviedb.txt")
|
def moviedb_out = dir_data.resolve('moviedb.txt')
|
||||||
def thetvdb_out = new File("${dir_data}/thetvdb.txt")
|
def thetvdb_out = dir_data.resolve('thetvdb.txt')
|
||||||
def anidb_out = new File("${dir_data}/anidb.txt")
|
def anidb_out = dir_data.resolve('anidb.txt')
|
||||||
def osdb_out = new File("${dir_data}/osdb.txt")
|
def osdb_out = dir_data.resolve('osdb.txt')
|
||||||
|
|
||||||
|
|
||||||
def pack(file, lines) {
|
def pack(file, lines) {
|
||||||
@ -157,7 +158,7 @@ def omdb = new TreeMap()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def tmdb_txt = new File('tmdb.txt')
|
def tmdb_txt = 'tmdb.txt' as File
|
||||||
def tmdb_index = csv(tmdb_txt, '\t', 1, [0..-1])
|
def tmdb_index = csv(tmdb_txt, '\t', 1, [0..-1])
|
||||||
|
|
||||||
def tmdb = []
|
def tmdb = []
|
||||||
@ -207,19 +208,19 @@ pack(moviedb_out, movies*.join('\t'))
|
|||||||
|
|
||||||
|
|
||||||
// BUILD tvdb index
|
// BUILD tvdb index
|
||||||
def tvdb_txt = new File('tvdb.txt')
|
def tvdb_txt = 'tvdb.txt' as File
|
||||||
def tvdb = [:]
|
def tvdb = [:]
|
||||||
|
|
||||||
if (tvdb_txt.exists()) {
|
if (tvdb_txt.exists()) {
|
||||||
tvdb_txt.eachLine('UTF-8'){
|
tvdb_txt.eachLine('UTF-8'){
|
||||||
def line = it.split('\t').toList()
|
def line = it.split('\t') as List
|
||||||
def names = line.subList(5, line.size())
|
|
||||||
tvdb.put(line[1] as Integer, [line[0] as Long, line[1] as Integer, line[2], line[3] as Float, line[4] as Float] + names)
|
tvdb.put(line[1] as Integer, [line[0] as Long, line[1] as Integer, line[2], line[3] as Float, line[4] as Float, line[5] as Integer] + line[6..<line.size()])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def tvdb_updates = [:] as TreeMap
|
def tvdb_updates = [:] as TreeMap
|
||||||
new File('updates_all.xml').eachLine('UTF-8'){
|
('updates_all.xml' as File).eachLine('UTF-8'){
|
||||||
def m = (it =~ '<Series><id>(\\d+)</id><time>(\\d+)</time></Series>')
|
def m = (it =~ '<Series><id>(\\d+)</id><time>(\\d+)</time></Series>')
|
||||||
while(m.find()) {
|
while(m.find()) {
|
||||||
def id = m.group(1) as Integer
|
def id = m.group(1) as Integer
|
||||||
@ -239,19 +240,19 @@ tvdb_updates.values().each{ update ->
|
|||||||
retry(2, 60000) {
|
retry(2, 60000) {
|
||||||
def seriesNames = []
|
def seriesNames = []
|
||||||
def xml = new XmlSlurper().parse("http://thetvdb.com/api/BA864DEE427E384A/series/${update.id}/en.xml")
|
def xml = new XmlSlurper().parse("http://thetvdb.com/api/BA864DEE427E384A/series/${update.id}/en.xml")
|
||||||
def imdbid = xml.Series.IMDB_ID.text()
|
def imdbid = any{ xml.Series.IMDB_ID.text().match(/tt\d+/) }{ '' }
|
||||||
|
|
||||||
seriesNames += xml.Series.SeriesName.text()
|
seriesNames += xml.Series.SeriesName.text()
|
||||||
|
|
||||||
def rating = tryQuietly{ xml.Series.Rating.text().toFloat() }
|
def rating = any{ xml.Series.Rating.text().toFloat() }{ 0 }
|
||||||
def votes = tryQuietly{ xml.Series.RatingCount.text().toFloat() }
|
def votes = any{ xml.Series.RatingCount.text().toFloat() }{ 0 }
|
||||||
|
def year = any{ xml.Series.FirstAired.text().match(/\d{4}/) as Integer }{ 0 }
|
||||||
|
|
||||||
// only retrieve additional data for reasonably popular shows
|
// only retrieve additional data for reasonably popular shows
|
||||||
if (votes >= 5 && rating >= 4) {
|
if (imdbid && votes >= 5 && rating >= 4) {
|
||||||
tryLogCatch{
|
tryLogCatch{
|
||||||
if (imdbid =~ /tt(\d+)/) {
|
|
||||||
seriesNames += OMDb.getMovieDescriptor(new Movie(imdbid.match(/tt(\d+)/) as int), Locale.ENGLISH).getName()
|
seriesNames += OMDb.getMovieDescriptor(new Movie(imdbid.match(/tt(\d+)/) as int), Locale.ENGLISH).getName()
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// scrape extra alias titles from webpage (not supported yet by API)
|
// scrape extra alias titles from webpage (not supported yet by API)
|
||||||
def html = Cache.getCache('thetvdb_series_page', CacheType.Persistent).text(update.id) {
|
def html = Cache.getCache('thetvdb_series_page', CacheType.Persistent).text(update.id) {
|
||||||
@ -274,14 +275,14 @@ tvdb_updates.values().each{ update ->
|
|||||||
seriesNames += intlseries
|
seriesNames += intlseries
|
||||||
}
|
}
|
||||||
|
|
||||||
def data = [update.time, update.id, imdbid, rating ?: 0, votes ?: 0] + seriesNames.findAll{ it != null && it.length() > 0 }
|
def data = [update.time, update.id, imdbid, rating, votes, year] + seriesNames.findAll{ it != null && it.length() > 0 }
|
||||||
tvdb.put(update.id, data)
|
tvdb.put(update.id, data)
|
||||||
log.info "Update $update => $data"
|
log.info "Update $update => $data"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch(Throwable e) {
|
catch(Throwable e) {
|
||||||
printException(e)
|
printException(e)
|
||||||
def data = [update.time, update.id, '', 0, 0]
|
def data = [update.time, update.id, '', 0, 0, 0]
|
||||||
tvdb.put(update.id, data)
|
tvdb.put(update.id, data)
|
||||||
log.info "[BAD] Update $update => $data"
|
log.info "[BAD] Update $update => $data"
|
||||||
}
|
}
|
||||||
@ -291,21 +292,27 @@ tvdb_updates.values().each{ update ->
|
|||||||
// remove entries that have become invalid
|
// remove entries that have become invalid
|
||||||
tvdb.keySet().toList().each{ id ->
|
tvdb.keySet().toList().each{ id ->
|
||||||
if (tvdb_updates[id] == null) {
|
if (tvdb_updates[id] == null) {
|
||||||
log.info "Invalid ID found: ${tvdb[id]}"
|
log.finest "Invalid ID found: ${tvdb[id]}"
|
||||||
tvdb.remove(id)
|
tvdb.remove(id)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tvdb.values().findResults{ it.collect{ it.toString().replace('\t', '').trim() }.join('\t') }.join('\n').saveAs(tvdb_txt)
|
tvdb.values().findResults{ it.collect{ it.toString().replace('\t', '').trim() }.join('\t') }.join('\n').saveAs(tvdb_txt)
|
||||||
|
|
||||||
// additional custom mappings
|
// additional custom mappings
|
||||||
def extraAliasNames = csv("${dir_data}/add-series-alias.txt")
|
def extraAliasNames = csv(dir_data.resolve('add-series-alias.txt'), '\t', 0, [1..-1])
|
||||||
|
|
||||||
def thetvdb_index = []
|
def thetvdb_index = []
|
||||||
tvdb.values().each{ r ->
|
tvdb.values().each{ r ->
|
||||||
def tvdb_id = r[1]
|
def tvdb_id = r[1]
|
||||||
def rating = r[3]
|
def rating = r[3]
|
||||||
def votes = r[4]
|
def votes = r[4]
|
||||||
def names = r.subList(5, r.size())
|
def year = r[5]
|
||||||
|
def names = r[6..<r.size()]
|
||||||
|
|
||||||
|
// ignore invalid entries
|
||||||
|
if (names.isEmpty()) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
def alias = extraAliasNames[names[0]]
|
def alias = extraAliasNames[names[0]]
|
||||||
if (alias) {
|
if (alias) {
|
||||||
@ -313,7 +320,12 @@ tvdb.values().each{ r ->
|
|||||||
names += alias
|
names += alias
|
||||||
}
|
}
|
||||||
|
|
||||||
if (alias !=null || (votes >= 5 && rating >= 4) || (votes >= 2 && rating >= 6) || (votes >= 1 && rating >= 10)) {
|
if (year > 0 && !names[0].endsWith(" ($year)")) {
|
||||||
|
names.add(1, names[0] + " ($year)")
|
||||||
|
}
|
||||||
|
|
||||||
|
// always include if alias has been manually added
|
||||||
|
if (alias != null || (votes >= 5 && rating >= 4) || (votes >= 2 && rating >= 6) || (votes >= 1 && rating >= 10)) {
|
||||||
getNamePermutations(names).each{ n ->
|
getNamePermutations(names).each{ n ->
|
||||||
thetvdb_index << [tvdb_id, n]
|
thetvdb_index << [tvdb_id, n]
|
||||||
}
|
}
|
||||||
@ -337,7 +349,7 @@ pack(thetvdb_out, thetvdb_txt)
|
|||||||
// BUILD osdb index
|
// BUILD osdb index
|
||||||
def osdb = []
|
def osdb = []
|
||||||
|
|
||||||
new File('osdb.txt').eachLine('UTF-8'){
|
('osdb.txt' as File).eachLine('UTF-8'){
|
||||||
def fields = it.split(/\t/)*.trim()
|
def fields = it.split(/\t/)*.trim()
|
||||||
|
|
||||||
// 0 IDMovie, 1 IDMovieImdb, 2 MovieName, 3 MovieYear, 4 MovieKind, 5 MoviePriority
|
// 0 IDMovie, 1 IDMovieImdb, 2 MovieName, 3 MovieYear, 4 MovieKind, 5 MoviePriority
|
||||||
@ -389,7 +401,7 @@ def anidb = new AnidbClient('filebot', 6).getAnimeTitles() as List
|
|||||||
def animeExcludes = [] as Set
|
def animeExcludes = [] as Set
|
||||||
|
|
||||||
// exclude anime movies from anime index
|
// exclude anime movies from anime index
|
||||||
new File('anime-list.xml').eachLine('UTF-8') {
|
('anime-list.xml' as File).eachLine('UTF-8') {
|
||||||
if (it =~ /tvdbid="movie"/ || it =~ /imdbid="ttd\+"/) {
|
if (it =~ /tvdbid="movie"/ || it =~ /imdbid="ttd\+"/) {
|
||||||
animeExcludes << it.match(/anidbid="(\d+)"/).toInteger()
|
animeExcludes << it.match(/anidbid="(\d+)"/).toInteger()
|
||||||
}
|
}
|
||||||
|
@ -80,14 +80,14 @@ public class ExpressionFormatFunctions {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static List<String> readLines(String path) throws IOException {
|
public static List<String> readLines(String path) throws IOException {
|
||||||
return FileUtilities.readLines(getFile(path));
|
return FileUtilities.readLines(getUserFile(path));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Object readXml(String path) throws Exception {
|
public static Object readXml(String path) throws Exception {
|
||||||
return new XmlSlurper().parse(getFile(path));
|
return new XmlSlurper().parse(getUserFile(path));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static File getFile(String path) {
|
public static File getUserFile(String path) {
|
||||||
File f = new File(path);
|
File f = new File(path);
|
||||||
|
|
||||||
if (!f.isAbsolute()) {
|
if (!f.isAbsolute()) {
|
||||||
|
Loading…
Reference in New Issue
Block a user