Refactor BuildData.groovy
This commit is contained in:
parent
592e3ae608
commit
a424b55b8e
|
@ -7,11 +7,11 @@ import org.tukaani.xz.*
|
|||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
|
||||
def dir_root = ".."
|
||||
def dir_website = "${dir_root}/website"
|
||||
def dir_data = "${dir_website}/data"
|
||||
def dir_root = ('..' as File).getCanonicalFile()
|
||||
def dir_website = dir_root.resolve('website')
|
||||
def dir_data = dir_website.resolve('data')
|
||||
|
||||
new File(dir_data).mkdirs()
|
||||
dir_data.mkdirs()
|
||||
|
||||
// sort and check shared regex collections
|
||||
def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw.githubusercontent.com/filebot/data/master')
|
||||
|
@ -23,7 +23,7 @@ def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw
|
|||
'series-mappings.txt'
|
||||
].each{
|
||||
def input = new URL(dir_data_master + '/' + it)
|
||||
def output = new File("${dir_data}/${it}")
|
||||
def output = dir_data.resolve(it)
|
||||
|
||||
log.fine "Fetch $input"
|
||||
def lines = new TreeSet(String.CASE_INSENSITIVE_ORDER)
|
||||
|
@ -40,7 +40,8 @@ def dir_data_master = System.getProperty('net.filebot.data.master', 'https://raw
|
|||
|
||||
|
||||
def reviews = []
|
||||
new File("${dir_root}/reviews.tsv").eachLine('UTF-8'){
|
||||
|
||||
dir_root.resolve('reviews.tsv').eachLine('UTF-8'){
|
||||
def s = it.split(/\t/, 3)*.trim()
|
||||
reviews << [user: s[0], date: s[1], text: s[2]]
|
||||
}
|
||||
|
@ -48,17 +49,17 @@ reviews = reviews.sort{ it.date }
|
|||
|
||||
def json = new groovy.json.JsonBuilder()
|
||||
json.call(reviews as List)
|
||||
json.toPrettyString().saveAs("${dir_website}/reviews.json")
|
||||
json.toPrettyString().saveAs(dir_website.resolve('reviews.json'))
|
||||
log.info "Reviews: " + reviews.size()
|
||||
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
|
||||
def moviedb_out = new File("${dir_data}/moviedb.txt")
|
||||
def thetvdb_out = new File("${dir_data}/thetvdb.txt")
|
||||
def anidb_out = new File("${dir_data}/anidb.txt")
|
||||
def osdb_out = new File("${dir_data}/osdb.txt")
|
||||
def moviedb_out = dir_data.resolve('moviedb.txt')
|
||||
def thetvdb_out = dir_data.resolve('thetvdb.txt')
|
||||
def anidb_out = dir_data.resolve('anidb.txt')
|
||||
def osdb_out = dir_data.resolve('osdb.txt')
|
||||
|
||||
|
||||
def pack(file, lines) {
|
||||
|
@ -157,7 +158,7 @@ def omdb = new TreeMap()
|
|||
}
|
||||
|
||||
|
||||
def tmdb_txt = new File('tmdb.txt')
|
||||
def tmdb_txt = 'tmdb.txt' as File
|
||||
def tmdb_index = csv(tmdb_txt, '\t', 1, [0..-1])
|
||||
|
||||
def tmdb = []
|
||||
|
@ -207,19 +208,19 @@ pack(moviedb_out, movies*.join('\t'))
|
|||
|
||||
|
||||
// BUILD tvdb index
|
||||
def tvdb_txt = new File('tvdb.txt')
|
||||
def tvdb_txt = 'tvdb.txt' as File
|
||||
def tvdb = [:]
|
||||
|
||||
if (tvdb_txt.exists()) {
|
||||
tvdb_txt.eachLine('UTF-8'){
|
||||
def line = it.split('\t').toList()
|
||||
def names = line.subList(5, line.size())
|
||||
tvdb.put(line[1] as Integer, [line[0] as Long, line[1] as Integer, line[2], line[3] as Float, line[4] as Float] + names)
|
||||
def line = it.split('\t') as List
|
||||
|
||||
tvdb.put(line[1] as Integer, [line[0] as Long, line[1] as Integer, line[2], line[3] as Float, line[4] as Float, line[5] as Integer] + line[6..<line.size()])
|
||||
}
|
||||
}
|
||||
|
||||
def tvdb_updates = [:] as TreeMap
|
||||
new File('updates_all.xml').eachLine('UTF-8'){
|
||||
('updates_all.xml' as File).eachLine('UTF-8'){
|
||||
def m = (it =~ '<Series><id>(\\d+)</id><time>(\\d+)</time></Series>')
|
||||
while(m.find()) {
|
||||
def id = m.group(1) as Integer
|
||||
|
@ -239,18 +240,18 @@ tvdb_updates.values().each{ update ->
|
|||
retry(2, 60000) {
|
||||
def seriesNames = []
|
||||
def xml = new XmlSlurper().parse("http://thetvdb.com/api/BA864DEE427E384A/series/${update.id}/en.xml")
|
||||
def imdbid = xml.Series.IMDB_ID.text()
|
||||
def imdbid = any{ xml.Series.IMDB_ID.text().match(/tt\d+/) }{ '' }
|
||||
|
||||
seriesNames += xml.Series.SeriesName.text()
|
||||
|
||||
def rating = tryQuietly{ xml.Series.Rating.text().toFloat() }
|
||||
def votes = tryQuietly{ xml.Series.RatingCount.text().toFloat() }
|
||||
def rating = any{ xml.Series.Rating.text().toFloat() }{ 0 }
|
||||
def votes = any{ xml.Series.RatingCount.text().toFloat() }{ 0 }
|
||||
def year = any{ xml.Series.FirstAired.text().match(/\d{4}/) as Integer }{ 0 }
|
||||
|
||||
// only retrieve additional data for reasonably popular shows
|
||||
if (votes >= 5 && rating >= 4) {
|
||||
if (imdbid && votes >= 5 && rating >= 4) {
|
||||
tryLogCatch{
|
||||
if (imdbid =~ /tt(\d+)/) {
|
||||
seriesNames += OMDb.getMovieDescriptor(new Movie(imdbid.match(/tt(\d+)/) as int), Locale.ENGLISH).getName()
|
||||
}
|
||||
seriesNames += OMDb.getMovieDescriptor(new Movie(imdbid.match(/tt(\d+)/) as int), Locale.ENGLISH).getName()
|
||||
}
|
||||
|
||||
// scrape extra alias titles from webpage (not supported yet by API)
|
||||
|
@ -274,14 +275,14 @@ tvdb_updates.values().each{ update ->
|
|||
seriesNames += intlseries
|
||||
}
|
||||
|
||||
def data = [update.time, update.id, imdbid, rating ?: 0, votes ?: 0] + seriesNames.findAll{ it != null && it.length() > 0 }
|
||||
def data = [update.time, update.id, imdbid, rating, votes, year] + seriesNames.findAll{ it != null && it.length() > 0 }
|
||||
tvdb.put(update.id, data)
|
||||
log.info "Update $update => $data"
|
||||
}
|
||||
}
|
||||
catch(Throwable e) {
|
||||
printException(e)
|
||||
def data = [update.time, update.id, '', 0, 0]
|
||||
def data = [update.time, update.id, '', 0, 0, 0]
|
||||
tvdb.put(update.id, data)
|
||||
log.info "[BAD] Update $update => $data"
|
||||
}
|
||||
|
@ -291,21 +292,27 @@ tvdb_updates.values().each{ update ->
|
|||
// remove entries that have become invalid
|
||||
tvdb.keySet().toList().each{ id ->
|
||||
if (tvdb_updates[id] == null) {
|
||||
log.info "Invalid ID found: ${tvdb[id]}"
|
||||
log.finest "Invalid ID found: ${tvdb[id]}"
|
||||
tvdb.remove(id)
|
||||
}
|
||||
}
|
||||
tvdb.values().findResults{ it.collect{ it.toString().replace('\t', '').trim() }.join('\t') }.join('\n').saveAs(tvdb_txt)
|
||||
|
||||
// additional custom mappings
|
||||
def extraAliasNames = csv("${dir_data}/add-series-alias.txt")
|
||||
def extraAliasNames = csv(dir_data.resolve('add-series-alias.txt'), '\t', 0, [1..-1])
|
||||
|
||||
def thetvdb_index = []
|
||||
tvdb.values().each{ r ->
|
||||
def tvdb_id = r[1]
|
||||
def rating = r[3]
|
||||
def votes = r[4]
|
||||
def names = r.subList(5, r.size())
|
||||
def year = r[5]
|
||||
def names = r[6..<r.size()]
|
||||
|
||||
// ignore invalid entries
|
||||
if (names.isEmpty()) {
|
||||
return
|
||||
}
|
||||
|
||||
def alias = extraAliasNames[names[0]]
|
||||
if (alias) {
|
||||
|
@ -313,7 +320,12 @@ tvdb.values().each{ r ->
|
|||
names += alias
|
||||
}
|
||||
|
||||
if (alias !=null || (votes >= 5 && rating >= 4) || (votes >= 2 && rating >= 6) || (votes >= 1 && rating >= 10)) {
|
||||
if (year > 0 && !names[0].endsWith(" ($year)")) {
|
||||
names.add(1, names[0] + " ($year)")
|
||||
}
|
||||
|
||||
// always include if alias has been manually added
|
||||
if (alias != null || (votes >= 5 && rating >= 4) || (votes >= 2 && rating >= 6) || (votes >= 1 && rating >= 10)) {
|
||||
getNamePermutations(names).each{ n ->
|
||||
thetvdb_index << [tvdb_id, n]
|
||||
}
|
||||
|
@ -337,7 +349,7 @@ pack(thetvdb_out, thetvdb_txt)
|
|||
// BUILD osdb index
|
||||
def osdb = []
|
||||
|
||||
new File('osdb.txt').eachLine('UTF-8'){
|
||||
('osdb.txt' as File).eachLine('UTF-8'){
|
||||
def fields = it.split(/\t/)*.trim()
|
||||
|
||||
// 0 IDMovie, 1 IDMovieImdb, 2 MovieName, 3 MovieYear, 4 MovieKind, 5 MoviePriority
|
||||
|
@ -389,7 +401,7 @@ def anidb = new AnidbClient('filebot', 6).getAnimeTitles() as List
|
|||
def animeExcludes = [] as Set
|
||||
|
||||
// exclude anime movies from anime index
|
||||
new File('anime-list.xml').eachLine('UTF-8') {
|
||||
('anime-list.xml' as File).eachLine('UTF-8') {
|
||||
if (it =~ /tvdbid="movie"/ || it =~ /imdbid="ttd\+"/) {
|
||||
animeExcludes << it.match(/anidbid="(\d+)"/).toInteger()
|
||||
}
|
||||
|
|
|
@ -80,14 +80,14 @@ public class ExpressionFormatFunctions {
|
|||
}
|
||||
|
||||
public static List<String> readLines(String path) throws IOException {
|
||||
return FileUtilities.readLines(getFile(path));
|
||||
return FileUtilities.readLines(getUserFile(path));
|
||||
}
|
||||
|
||||
public static Object readXml(String path) throws Exception {
|
||||
return new XmlSlurper().parse(getFile(path));
|
||||
return new XmlSlurper().parse(getUserFile(path));
|
||||
}
|
||||
|
||||
public static File getFile(String path) {
|
||||
public static File getUserFile(String path) {
|
||||
File f = new File(path);
|
||||
|
||||
if (!f.isAbsolute()) {
|
||||
|
|
Loading…
Reference in New Issue