Refactor BuildData.groovy

This commit is contained in:
Reinhard Pointner 2016-11-19 20:16:00 +08:00
parent 592e3ae608
commit a424b55b8e
2 changed files with 47 additions and 35 deletions

View File

@ -7,11 +7,11 @@ import org.tukaani.xz.*
/* ------------------------------------------------------------------------- */
def dir_root = ".."
def dir_website = "${dir_root}/website"
def dir_data = "${dir_website}/data"
def dir_root = ('..' as File).getCanonicalFile()
def dir_website = dir_root.resolve('website')
def dir_data = dir_website.resolve('data')
new File(dir_data).mkdirs()
dir_data.mkdirs()
// sort and check shared regex collections
def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw.githubusercontent.com/filebot/data/master')
@ -23,7 +23,7 @@ def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw
'series-mappings.txt'
].each{
def input = new URL(dir_data_master + '/' + it)
def output = new File("${dir_data}/${it}")
def output = dir_data.resolve(it)
log.fine "Fetch $input"
def lines = new TreeSet(String.CASE_INSENSITIVE_ORDER)
@ -40,7 +40,8 @@ def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw
def reviews = []
new File("${dir_root}/reviews.tsv").eachLine('UTF-8'){
dir_root.resolve('reviews.tsv').eachLine('UTF-8'){
def s = it.split(/\t/, 3)*.trim()
reviews << [user: s[0], date: s[1], text: s[2]]
}
@ -48,17 +49,17 @@ reviews = reviews.sort{ it.date }
def json = new groovy.json.JsonBuilder()
json.call(reviews as List)
json.toPrettyString().saveAs("${dir_website}/reviews.json")
json.toPrettyString().saveAs(dir_website.resolve('reviews.json'))
log.info "Reviews: " + reviews.size()
/* ------------------------------------------------------------------------- */
def moviedb_out = new File("${dir_data}/moviedb.txt")
def thetvdb_out = new File("${dir_data}/thetvdb.txt")
def anidb_out = new File("${dir_data}/anidb.txt")
def osdb_out = new File("${dir_data}/osdb.txt")
def moviedb_out = dir_data.resolve('moviedb.txt')
def thetvdb_out = dir_data.resolve('thetvdb.txt')
def anidb_out = dir_data.resolve('anidb.txt')
def osdb_out = dir_data.resolve('osdb.txt')
def pack(file, lines) {
@ -157,7 +158,7 @@ def omdb = new TreeMap()
}
def tmdb_txt = new File('tmdb.txt')
def tmdb_txt = 'tmdb.txt' as File
def tmdb_index = csv(tmdb_txt, '\t', 1, [0..-1])
def tmdb = []
@ -207,19 +208,19 @@ pack(moviedb_out, movies*.join('\t'))
// BUILD tvdb index
def tvdb_txt = new File('tvdb.txt')
def tvdb_txt = 'tvdb.txt' as File
def tvdb = [:]
if (tvdb_txt.exists()) {
tvdb_txt.eachLine('UTF-8'){
def line = it.split('\t').toList()
def names = line.subList(5, line.size())
tvdb.put(line[1] as Integer, [line[0] as Long, line[1] as Integer, line[2], line[3] as Float, line[4] as Float] + names)
def line = it.split('\t') as List
tvdb.put(line[1] as Integer, [line[0] as Long, line[1] as Integer, line[2], line[3] as Float, line[4] as Float, line[5] as Integer] + line[6..<line.size()])
}
}
def tvdb_updates = [:] as TreeMap
new File('updates_all.xml').eachLine('UTF-8'){
('updates_all.xml' as File).eachLine('UTF-8'){
def m = (it =~ '<Series><id>(\\d+)</id><time>(\\d+)</time></Series>')
while(m.find()) {
def id = m.group(1) as Integer
@ -239,18 +240,18 @@ tvdb_updates.values().each{ update ->
retry(2, 60000) {
def seriesNames = []
def xml = new XmlSlurper().parse("http://thetvdb.com/api/BA864DEE427E384A/series/${update.id}/en.xml")
def imdbid = xml.Series.IMDB_ID.text()
def imdbid = any{ xml.Series.IMDB_ID.text().match(/tt\d+/) }{ '' }
seriesNames += xml.Series.SeriesName.text()
def rating = tryQuietly{ xml.Series.Rating.text().toFloat() }
def votes = tryQuietly{ xml.Series.RatingCount.text().toFloat() }
def rating = any{ xml.Series.Rating.text().toFloat() }{ 0 }
def votes = any{ xml.Series.RatingCount.text().toFloat() }{ 0 }
def year = any{ xml.Series.FirstAired.text().match(/\d{4}/) as Integer }{ 0 }
// only retrieve additional data for reasonably popular shows
if (votes >= 5 && rating >= 4) {
if (imdbid && votes >= 5 && rating >= 4) {
tryLogCatch{
if (imdbid =~ /tt(\d+)/) {
seriesNames += OMDb.getMovieDescriptor(new Movie(imdbid.match(/tt(\d+)/) as int), Locale.ENGLISH).getName()
}
seriesNames += OMDb.getMovieDescriptor(new Movie(imdbid.match(/tt(\d+)/) as int), Locale.ENGLISH).getName()
}
// scrape extra alias titles from webpage (not supported yet by API)
@ -274,14 +275,14 @@ tvdb_updates.values().each{ update ->
seriesNames += intlseries
}
def data = [update.time, update.id, imdbid, rating ?: 0, votes ?: 0] + seriesNames.findAll{ it != null && it.length() > 0 }
def data = [update.time, update.id, imdbid, rating, votes, year] + seriesNames.findAll{ it != null && it.length() > 0 }
tvdb.put(update.id, data)
log.info "Update $update => $data"
}
}
catch(Throwable e) {
printException(e)
def data = [update.time, update.id, '', 0, 0]
def data = [update.time, update.id, '', 0, 0, 0]
tvdb.put(update.id, data)
log.info "[BAD] Update $update => $data"
}
@ -291,21 +292,27 @@ tvdb_updates.values().each{ update ->
// remove entries that have become invalid
tvdb.keySet().toList().each{ id ->
if (tvdb_updates[id] == null) {
log.info "Invalid ID found: ${tvdb[id]}"
log.finest "Invalid ID found: ${tvdb[id]}"
tvdb.remove(id)
}
}
tvdb.values().findResults{ it.collect{ it.toString().replace('\t', '').trim() }.join('\t') }.join('\n').saveAs(tvdb_txt)
// additional custom mappings
def extraAliasNames = csv("${dir_data}/add-series-alias.txt")
def extraAliasNames = csv(dir_data.resolve('add-series-alias.txt'), '\t', 0, [1..-1])
def thetvdb_index = []
tvdb.values().each{ r ->
def tvdb_id = r[1]
def rating = r[3]
def votes = r[4]
def names = r.subList(5, r.size())
def year = r[5]
def names = r[6..<r.size()]
// ignore invalid entries
if (names.isEmpty()) {
return
}
def alias = extraAliasNames[names[0]]
if (alias) {
@ -313,7 +320,12 @@ tvdb.values().each{ r ->
names += alias
}
if (alias !=null || (votes >= 5 && rating >= 4) || (votes >= 2 && rating >= 6) || (votes >= 1 && rating >= 10)) {
if (year > 0 && !names[0].endsWith(" ($year)")) {
names.add(1, names[0] + " ($year)")
}
// always include if alias has been manually added
if (alias != null || (votes >= 5 && rating >= 4) || (votes >= 2 && rating >= 6) || (votes >= 1 && rating >= 10)) {
getNamePermutations(names).each{ n ->
thetvdb_index << [tvdb_id, n]
}
@ -337,7 +349,7 @@ pack(thetvdb_out, thetvdb_txt)
// BUILD osdb index
def osdb = []
new File('osdb.txt').eachLine('UTF-8'){
('osdb.txt' as File).eachLine('UTF-8'){
def fields = it.split(/\t/)*.trim()
// 0 IDMovie, 1 IDMovieImdb, 2 MovieName, 3 MovieYear, 4 MovieKind, 5 MoviePriority
@ -389,7 +401,7 @@ def anidb = new AnidbClient('filebot', 6).getAnimeTitles() as List
def animeExcludes = [] as Set
// exclude anime movies from anime index
new File('anime-list.xml').eachLine('UTF-8') {
('anime-list.xml' as File).eachLine('UTF-8') {
if (it =~ /tvdbid="movie"/ || it =~ /imdbid="ttd\+"/) {
animeExcludes << it.match(/anidbid="(\d+)"/).toInteger()
}

View File

@ -80,14 +80,14 @@ public class ExpressionFormatFunctions {
}
public static List<String> readLines(String path) throws IOException {
return FileUtilities.readLines(getFile(path));
return FileUtilities.readLines(getUserFile(path));
}
public static Object readXml(String path) throws Exception {
return new XmlSlurper().parse(getFile(path));
return new XmlSlurper().parse(getUserFile(path));
}
public static File getFile(String path) {
public static File getUserFile(String path) {
File f = new File(path);
if (!f.isAbsolute()) {