2014-01-08 08:36:32 +00:00
|
|
|
|
import org.tukaani.xz.*
|
2013-11-20 10:07:25 +00:00
|
|
|
|
|
|
|
|
|
/* ------------------------------------------------------------------------- */
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def sortRegexList(path) {
|
|
|
|
|
def set = new TreeSet(String.CASE_INSENSITIVE_ORDER)
|
|
|
|
|
new File(path).eachLine('UTF-8'){
|
|
|
|
|
// check if regex compiles
|
|
|
|
|
set += java.util.regex.Pattern.compile(it.trim()).pattern()
|
|
|
|
|
}
|
|
|
|
|
def out = set.join('\n').saveAs(path)
|
2013-11-21 16:31:09 +00:00
|
|
|
|
println "${out}\n${out.text}\n"
|
2013-08-10 05:23:14 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// sort and check shared regex collections
|
|
|
|
|
sortRegexList("website/data/release-groups.txt")
|
|
|
|
|
sortRegexList("website/data/query-blacklist.txt")
|
|
|
|
|
sortRegexList("website/data/exclude-blacklist.txt")
|
|
|
|
|
sortRegexList("website/data/series-mappings.txt")
|
|
|
|
|
|
|
|
|
|
|
2013-11-20 10:07:25 +00:00
|
|
|
|
/* ------------------------------------------------------------------------- */
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def reviews = []
|
2013-11-20 10:07:25 +00:00
|
|
|
|
new File('reviews.csv').eachLine('UTF-8'){
|
|
|
|
|
def s = it.split(';', 3)
|
|
|
|
|
reviews << [user: s[0], date: s[1], text: s[2].replaceAll(/^\"|\"$/, '').replaceAll(/["]{2}/, '"') ]
|
|
|
|
|
}
|
2013-08-10 05:23:14 +00:00
|
|
|
|
reviews = reviews.sort{ it.date }
|
|
|
|
|
|
|
|
|
|
def json = new groovy.json.JsonBuilder()
|
|
|
|
|
json.call(reviews as List)
|
|
|
|
|
json.toPrettyString().saveAs('website/reviews.json')
|
|
|
|
|
println "Reviews: " + reviews.size()
|
|
|
|
|
|
|
|
|
|
|
2013-11-20 10:07:25 +00:00
|
|
|
|
/* ------------------------------------------------------------------------- */
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
|
|
|
|
|
2013-11-21 16:31:09 +00:00
|
|
|
|
def moviedb_out = new File("website/data/moviedb.txt")
|
2013-08-10 07:56:11 +00:00
|
|
|
|
def thetvdb_out = new File("website/data/thetvdb.txt")
|
|
|
|
|
def anidb_out = new File("website/data/anidb.txt")
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
2013-08-10 07:56:11 +00:00
|
|
|
|
def pack(file, lines) {
|
|
|
|
|
new File(file.parentFile, file.name + '.xz').withOutputStream{ out ->
|
|
|
|
|
new XZOutputStream(out, new LZMA2Options(LZMA2Options.PRESET_DEFAULT)).withWriter('UTF-8'){ writer ->
|
2013-08-10 05:23:14 +00:00
|
|
|
|
lines.each{ writer.append(it).append('\n') }
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-01-07 15:21:38 +00:00
|
|
|
|
def rows = lines.size()
|
|
|
|
|
def columns = lines.collect{ it.split(/\t/).length }.max()
|
|
|
|
|
println "$file ($rows rows, $columns columns)"
|
2013-08-10 05:23:14 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2013-11-20 10:07:25 +00:00
|
|
|
|
/* ------------------------------------------------------------------------- */
|
|
|
|
|
|
|
|
|
|
|
2013-11-21 14:31:31 +00:00
|
|
|
|
def isValidMovieName(s) {
|
2014-03-06 15:50:14 +00:00
|
|
|
|
return (s.normalizePunctuation().length() >= 4) || (s=~ /^[A-Z0-9]/ && s =~ /[\p{Alnum}]{3}/)
|
2013-11-21 14:31:31 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-01-06 23:22:31 +00:00
|
|
|
|
def getNamePermutations(names) {
|
2014-09-15 23:49:19 +00:00
|
|
|
|
def normalize = { s -> s.toLowerCase().normalizePunctuation() }.memoize()
|
2014-09-15 19:04:25 +00:00
|
|
|
|
def fn1 = { s -> s.replaceAll(/(?i)(^(The|A)\s)|([,]\s(The|A)$)/, '') }
|
2014-01-06 23:22:31 +00:00
|
|
|
|
def fn2 = { s -> s.replaceAll(/\s&\s/, ' and ') }
|
|
|
|
|
def fn3 = { s -> s.replaceAll(/\([^\)]*\)$/, '') }
|
|
|
|
|
|
2014-09-11 20:04:24 +00:00
|
|
|
|
def out = names*.trim().unique().collectMany{ original ->
|
2014-09-15 19:04:25 +00:00
|
|
|
|
def simplified = original
|
|
|
|
|
[fn1, fn2, fn3].each{ fn -> simplified = fn(simplified).trim() }
|
|
|
|
|
return [original, simplified]
|
|
|
|
|
}.unique{ normalize(it) }.findAll{ it.length() > 0 }
|
|
|
|
|
|
2014-03-18 20:08:06 +00:00
|
|
|
|
out = out.findAll{ it.length() >= 2 && !(it ==~ /[1][0-9][1-9]/) && !(it =~ /^[a-z]/) && it =~ /^[@.\p{L}\p{Digit}]/ } // MUST START WITH UNICODE LETTER
|
2014-01-11 09:04:49 +00:00
|
|
|
|
out = out.findAll{ !MediaDetection.releaseInfo.structureRootPattern.matcher(it).matches() } // IGNORE NAMES THAT OVERLAP WITH MEDIA FOLDER NAMES
|
2014-10-10 18:17:31 +00:00
|
|
|
|
// out = out.findAll{ a -> names.take(1).contains(a) || out.findAll{ b -> normalize(a).startsWith(normalize(b) + ' ') }.size() == 0 } // TRY TO EXCLUDE REDUNDANT SUBSTRING DUPLICATES
|
2014-09-15 23:49:19 +00:00
|
|
|
|
|
2014-09-15 19:04:25 +00:00
|
|
|
|
return out
|
2014-01-06 23:22:31 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-11-20 10:07:25 +00:00
|
|
|
|
def treeSort(list, keyFunction) {
|
|
|
|
|
def sorter = new TreeMap(String.CASE_INSENSITIVE_ORDER)
|
|
|
|
|
list.each{
|
|
|
|
|
sorter.put(keyFunction(it), it)
|
|
|
|
|
}
|
|
|
|
|
return sorter.values()
|
|
|
|
|
}
|
|
|
|
|
|
2014-04-18 19:41:39 +00:00
|
|
|
|
def csv(f, delim, keyIndex, valueIndex) {
|
|
|
|
|
def values = [:]
|
|
|
|
|
if (f.isFile()) {
|
|
|
|
|
f.splitEachLine(delim, 'UTF-8') { line ->
|
|
|
|
|
values.put(line[keyIndex], tryQuietly{ line[valueIndex] })
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return values
|
|
|
|
|
}
|
|
|
|
|
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
2014-03-09 12:50:03 +00:00
|
|
|
|
/* ------------------------------------------------------------------------- */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// BUILD moviedb index
|
2013-11-21 14:31:31 +00:00
|
|
|
|
def omdb = []
|
2013-08-10 05:23:14 +00:00
|
|
|
|
new File('omdb.txt').eachLine('Windows-1252'){
|
|
|
|
|
def line = it.split(/\t/)
|
2014-09-26 16:41:42 +00:00
|
|
|
|
if (line.length > 11 && line[0] ==~ /\d+/ && line[3] ==~ /\d{4}/) {
|
2013-08-10 05:23:14 +00:00
|
|
|
|
def imdbid = line[1].substring(2).toInteger()
|
2013-09-10 10:12:55 +00:00
|
|
|
|
def name = line[2].replaceAll(/\s+/, ' ').trim()
|
2013-08-10 05:23:14 +00:00
|
|
|
|
def year = line[3].toInteger()
|
|
|
|
|
def runtime = line[5]
|
2014-02-18 06:55:45 +00:00
|
|
|
|
def rating = tryQuietly{ line[12].toFloat() } ?: 0
|
|
|
|
|
def votes = tryQuietly{ line[13].replaceAll(/\D/, '').toInteger() } ?: 0
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
2014-03-06 15:50:14 +00:00
|
|
|
|
if ((year >= 1970 && (runtime =~ /(\d.h)|(\d{3}.min)/ || votes >= 200) && rating >= 1 && votes >= 50) || (year >= 1950 && votes >= 5000)) {
|
2013-11-20 10:07:25 +00:00
|
|
|
|
omdb << [imdbid.pad(7), name, year]
|
2013-08-10 05:23:14 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-11-20 10:07:25 +00:00
|
|
|
|
omdb = omdb.findAll{ (it[0] as int) <= 9999999 && isValidMovieName(it[1]) }
|
|
|
|
|
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
2013-11-20 10:07:25 +00:00
|
|
|
|
def tmdb_txt = new File('tmdb.txt')
|
|
|
|
|
def tmdb_index = csv(tmdb_txt, '\t', 1, [0..-1])
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
2014-08-27 18:26:06 +00:00
|
|
|
|
def tmdb = []
|
|
|
|
|
omdb.each{ m ->
|
2014-01-23 18:18:25 +00:00
|
|
|
|
def sync = System.currentTimeMillis()
|
2014-09-17 07:30:33 +00:00
|
|
|
|
if (tmdb_index.containsKey(m[0]) && (sync - tmdb_index[m[0]][0].toLong()) < ((m[2].toInteger() < 2000 ? 360 : 120) * 24 * 60 * 60 * 1000L) ) {
|
2014-08-27 18:26:06 +00:00
|
|
|
|
tmdb << tmdb_index[m[0]]
|
|
|
|
|
return
|
2013-11-20 10:07:25 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
2014-08-22 06:59:30 +00:00
|
|
|
|
def info = WebServices.TheMovieDB.getMovieInfo("tt${m[0]}", Locale.ENGLISH, true)
|
2014-01-23 18:18:25 +00:00
|
|
|
|
def names = [info.name, info.originalName] + info.alternativeTitles
|
2014-08-27 18:26:06 +00:00
|
|
|
|
[info?.released?.year, m[2]].findResults{ it?.toInteger() }.unique().each{ y ->
|
|
|
|
|
def row = [sync, m[0].pad(7), info.id.pad(7), y.pad(4)] + names
|
|
|
|
|
println row
|
|
|
|
|
tmdb << row
|
2014-01-23 18:18:25 +00:00
|
|
|
|
}
|
2013-11-20 10:07:25 +00:00
|
|
|
|
} catch(FileNotFoundException e) {
|
2014-08-27 18:26:06 +00:00
|
|
|
|
def row = [sync, m[0].pad(7), 0, m[2], m[1]]
|
|
|
|
|
println row
|
|
|
|
|
tmdb << row
|
2013-11-20 10:07:25 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2013-11-21 14:31:31 +00:00
|
|
|
|
tmdb*.join('\t').join('\n').saveAs(tmdb_txt)
|
2013-11-20 10:07:25 +00:00
|
|
|
|
|
|
|
|
|
movies = tmdb.findResults{
|
|
|
|
|
def ity = it[1..3] // imdb id, tmdb id, year
|
2014-01-06 23:22:31 +00:00
|
|
|
|
def names = getNamePermutations(it[4..-1]).findAll{ isValidMovieName(it) }
|
2013-11-21 16:31:09 +00:00
|
|
|
|
if (ity[0].toInteger() > 0 && ity[1].toInteger() > 0 && names.size() > 0)
|
2013-11-20 10:07:25 +00:00
|
|
|
|
return ity + names
|
|
|
|
|
else
|
|
|
|
|
return null
|
|
|
|
|
}
|
|
|
|
|
movies = treeSort(movies, { it[3, 2].join(' ') })
|
|
|
|
|
|
2013-08-10 05:23:14 +00:00
|
|
|
|
// sanity check
|
2014-04-19 16:54:25 +00:00
|
|
|
|
if (movies.size() < 40000) { die('Movie index sanity failed:' + movies.size()) }
|
2014-01-24 17:31:33 +00:00
|
|
|
|
pack(moviedb_out, movies*.join('\t'))
|
2013-11-20 10:07:25 +00:00
|
|
|
|
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
2013-11-20 10:07:25 +00:00
|
|
|
|
/* ------------------------------------------------------------------------- */
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
2013-11-20 10:07:25 +00:00
|
|
|
|
|
|
|
|
|
// BUILD tvdb index
|
2013-08-10 05:23:14 +00:00
|
|
|
|
def tvdb_txt = new File('tvdb.txt')
|
2013-12-02 18:25:06 +00:00
|
|
|
|
def tvdb = [:]
|
2014-03-09 12:50:03 +00:00
|
|
|
|
|
|
|
|
|
if (tvdb_txt.exists()) {
|
2014-08-14 17:29:34 +00:00
|
|
|
|
tvdb_txt.eachLine('UTF-8'){
|
2014-03-09 12:50:03 +00:00
|
|
|
|
def line = it.split('\t').toList()
|
2014-08-16 16:28:40 +00:00
|
|
|
|
def names = line.subList(5, line.size())
|
|
|
|
|
tvdb.put(line[1] as Integer, [line[0] as Long, line[1] as Integer, line[2], line[3] as Float, line[4] as Float] + names)
|
2014-03-09 12:50:03 +00:00
|
|
|
|
}
|
2013-08-10 05:23:14 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-08-16 03:07:51 +00:00
|
|
|
|
def tvdb_updates = [:] as TreeMap
|
2014-08-14 17:29:34 +00:00
|
|
|
|
new File('updates_all.xml').eachLine('UTF-8'){
|
|
|
|
|
def m = (it =~ '<Series><id>(\\d+)</id><time>(\\d+)</time></Series>')
|
2014-08-16 02:40:39 +00:00
|
|
|
|
while(m.find()) {
|
|
|
|
|
def id = m.group(1) as Integer
|
|
|
|
|
def time = m.group(2) as Integer
|
|
|
|
|
tvdb_updates[id] = [id: id, time: time]
|
2014-08-14 17:29:34 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2014-08-16 02:40:39 +00:00
|
|
|
|
tvdb_updates.values().each{ update ->
|
2014-03-10 05:34:53 +00:00
|
|
|
|
if (tvdb[update.id] == null || update.time > tvdb[update.id][0]) {
|
2013-08-10 05:23:14 +00:00
|
|
|
|
try {
|
2014-08-16 02:40:39 +00:00
|
|
|
|
retry(2, 500) {
|
2014-08-15 09:58:42 +00:00
|
|
|
|
def seriesNames = []
|
2014-04-18 19:41:39 +00:00
|
|
|
|
def xml = new XmlSlurper().parse("http://thetvdb.com/api/BA864DEE427E384A/series/${update.id}/en.xml")
|
|
|
|
|
def imdbid = xml.Series.IMDB_ID.text()
|
2014-08-15 09:58:42 +00:00
|
|
|
|
seriesNames += xml.Series.SeriesName.text()
|
2014-03-09 12:50:03 +00:00
|
|
|
|
|
2014-04-18 19:41:39 +00:00
|
|
|
|
def rating = tryQuietly{ xml.Series.Rating.text().toFloat() }
|
2014-05-15 08:18:50 +00:00
|
|
|
|
def votes = tryQuietly{ xml.Series.RatingCount.text().toFloat() }
|
2014-03-09 12:50:03 +00:00
|
|
|
|
|
2014-08-15 09:58:42 +00:00
|
|
|
|
// only retrieve additional data for reasonably popular shows
|
|
|
|
|
if (votes >= 5 && rating >= 4) {
|
|
|
|
|
tryLogCatch{
|
|
|
|
|
if (imdbid =~ /tt(\d+)/) {
|
2014-09-15 20:41:51 +00:00
|
|
|
|
seriesNames += OMDb.getMovieDescriptor(new Movie(null, 0, imdbid.match(/tt(\d+)/) as int, -1), Locale.ENGLISH).getName()
|
2014-08-15 09:58:42 +00:00
|
|
|
|
}
|
2013-08-10 05:23:14 +00:00
|
|
|
|
}
|
2014-08-15 09:58:42 +00:00
|
|
|
|
|
|
|
|
|
// scrape extra alias titles from webpage (not supported yet by API)
|
|
|
|
|
seriesNames += org.jsoup.Jsoup.connect("http://thetvdb.com/?tab=series&id=${update.id}").get()
|
|
|
|
|
.select('#akaseries table tr table tr')
|
|
|
|
|
.findAll{ it.select('td').any{ it.text() ==~ /en/ } }
|
2014-09-17 07:30:33 +00:00
|
|
|
|
.findResults{ it.select('td').first().text() }
|
2013-08-10 05:23:14 +00:00
|
|
|
|
}
|
2014-08-15 09:58:42 +00:00
|
|
|
|
|
|
|
|
|
def data = [update.time, update.id, imdbid, rating ?: 0, votes ?: 0] + seriesNames.findAll{ it != null && it.length() > 0 }
|
2013-08-10 05:23:14 +00:00
|
|
|
|
tvdb.put(update.id, data)
|
|
|
|
|
println "Update $update => $data"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
catch(Throwable e) {
|
2014-08-16 02:40:39 +00:00
|
|
|
|
printException(e, false)
|
2014-08-15 09:58:42 +00:00
|
|
|
|
def data = [update.time, update.id, '', 0, 0]
|
2013-08-10 05:23:14 +00:00
|
|
|
|
tvdb.put(update.id, data)
|
|
|
|
|
println "Update $update => $data"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-12-02 18:25:06 +00:00
|
|
|
|
|
|
|
|
|
// remove entries that have become invalid
|
|
|
|
|
tvdb.keySet().toList().each{ id ->
|
2014-08-16 02:40:39 +00:00
|
|
|
|
if (tvdb_updates[id] == null) {
|
2013-12-02 18:25:06 +00:00
|
|
|
|
println "Invalid ID found: ${tvdb[id]}"
|
|
|
|
|
tvdb.remove(id)
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-05-12 19:18:31 +00:00
|
|
|
|
tvdb.values().findResults{ it.collect{ it.toString().replace('\t', '').trim() }.join('\t') }.join('\n').saveAs(tvdb_txt)
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def thetvdb_index = []
|
2014-01-06 23:22:31 +00:00
|
|
|
|
tvdb.values().each{ r ->
|
2014-03-09 12:50:03 +00:00
|
|
|
|
def tvdb_id = r[1]
|
2014-08-15 09:58:42 +00:00
|
|
|
|
def rating = r[3]
|
|
|
|
|
def votes = r[4]
|
2014-08-16 16:28:40 +00:00
|
|
|
|
def names = r.subList(5, r.size())
|
|
|
|
|
|
2014-03-29 02:22:32 +00:00
|
|
|
|
if ((votes >= 5 && rating >= 4) || (votes >= 2 && rating >= 7) || (votes >= 1 && rating >= 10)) {
|
2014-08-15 09:58:42 +00:00
|
|
|
|
getNamePermutations(names).each{ n ->
|
2014-03-09 12:50:03 +00:00
|
|
|
|
thetvdb_index << [tvdb_id, n]
|
|
|
|
|
}
|
2013-08-10 05:23:14 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-12-14 10:49:16 +00:00
|
|
|
|
def addSeriesAlias = { from, to ->
|
2013-12-29 08:06:22 +00:00
|
|
|
|
def se = thetvdb_index.find{ from == it[1] && !it.contains(to) }
|
2014-04-19 16:54:25 +00:00
|
|
|
|
if (se == null) die("Unabled to find series '${from}': '${to}'")
|
2014-01-07 15:21:38 +00:00
|
|
|
|
thetvdb_index << [se[0], to]
|
2013-12-14 10:49:16 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// additional custom mappings
|
2014-01-07 15:21:38 +00:00
|
|
|
|
addSeriesAlias('Law & Order: Special Victims Unit', 'Law and Order SVU')
|
2013-12-15 18:35:41 +00:00
|
|
|
|
addSeriesAlias('Law & Order: Special Victims Unit', 'Law & Order SVU')
|
2013-12-14 10:49:16 +00:00
|
|
|
|
addSeriesAlias('CSI: Crime Scene Investigation', 'CSI')
|
|
|
|
|
addSeriesAlias('M*A*S*H', 'MASH')
|
|
|
|
|
addSeriesAlias('M*A*S*H', 'M.A.S.H.')
|
|
|
|
|
addSeriesAlias('NCIS: Los Angeles', 'NCIS LA')
|
2014-03-17 18:25:23 +00:00
|
|
|
|
addSeriesAlias('NCIS: Los Angeles', 'NCIS LosAngeles')
|
2013-12-16 03:59:55 +00:00
|
|
|
|
addSeriesAlias('How I Met Your Mother', 'HIMYM')
|
|
|
|
|
addSeriesAlias('Battlestar Galactica (2003)', 'BSG')
|
2013-12-14 10:49:16 +00:00
|
|
|
|
addSeriesAlias('World Series of Poker', 'WSOP')
|
2013-12-16 03:59:55 +00:00
|
|
|
|
addSeriesAlias('House of Cards', 'HOC')
|
2014-01-03 14:20:05 +00:00
|
|
|
|
addSeriesAlias('The Big Bang Theory', 'TBBT')
|
2014-03-18 20:08:06 +00:00
|
|
|
|
addSeriesAlias('The Walking Dead', 'TWD')
|
|
|
|
|
addSeriesAlias('@midnight', 'At Midnight')
|
2014-03-29 02:22:32 +00:00
|
|
|
|
addSeriesAlias('The Late Late Show with Craig Ferguson', 'Craig Ferguson')
|
2014-03-31 12:22:35 +00:00
|
|
|
|
addSeriesAlias('Naruto Shippuden', 'Naruto Shippuuden')
|
2014-04-05 10:39:44 +00:00
|
|
|
|
addSeriesAlias('Resurrection', 'Resurrection (US)')
|
2014-05-06 18:50:03 +00:00
|
|
|
|
addSeriesAlias('Revolution', 'Revolution (2012)')
|
2014-05-12 19:18:31 +00:00
|
|
|
|
addSeriesAlias('Cosmos: A Spacetime Odyssey', 'Cosmos A Space Time Odyssey')
|
2014-08-05 16:41:42 +00:00
|
|
|
|
addSeriesAlias('The Bridge (2013)', 'The Bridge (US)')
|
2014-03-31 12:22:35 +00:00
|
|
|
|
|
2013-12-14 10:49:16 +00:00
|
|
|
|
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
2013-09-16 04:18:11 +00:00
|
|
|
|
thetvdb_index = thetvdb_index.findResults{ [it[0] as Integer, it[1].replaceAll(/\s+/, ' ').trim()] }.findAll{ !(it[1] =~ /(?i:duplicate)/ || it[1] =~ /\d{6,}/ || it[1].startsWith('*') || it[1].endsWith('*') || it[1].length() < 2) }
|
2014-05-12 19:18:31 +00:00
|
|
|
|
thetvdb_index = thetvdb_index.sort{ a, b -> a[0] <=> b[0] }
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
|
|
|
|
// join and sort
|
2014-09-11 20:04:24 +00:00
|
|
|
|
def thetvdb_txt = thetvdb_index.groupBy{ it[0] }.findResults{ k, v -> ([k.pad(6)] + v*.getAt(1).unique{ it.toLowerCase() }).take(4).join('\t') }
|
2013-12-15 18:35:41 +00:00
|
|
|
|
|
2013-08-10 05:23:14 +00:00
|
|
|
|
// sanity check
|
2014-04-19 16:54:25 +00:00
|
|
|
|
if (thetvdb_txt.size() < 4000) { die('TheTVDB index sanity failed: ' + thetvdb_txt.size()) }
|
2014-01-24 17:31:33 +00:00
|
|
|
|
pack(thetvdb_out, thetvdb_txt)
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
|
|
|
|
|
2013-11-20 10:07:25 +00:00
|
|
|
|
/* ------------------------------------------------------------------------- */
|
|
|
|
|
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
2013-11-20 10:07:25 +00:00
|
|
|
|
// BUILD anidb index
|
2014-04-27 10:02:49 +00:00
|
|
|
|
def anidb = new AnidbClient('filebot', 5).getAnimeTitles()
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
2013-09-07 15:48:24 +00:00
|
|
|
|
def anidb_index = anidb.findResults{
|
2014-01-08 08:36:32 +00:00
|
|
|
|
def names = it.effectiveNames*.replaceAll(/\s+/, ' ')*.trim()*.replaceAll(/['`´‘’ʻ]+/, /'/)
|
|
|
|
|
names = getNamePermutations(names)
|
|
|
|
|
names = names.findAll{ stripReleaseInfo(it)?.length() > 0 }
|
|
|
|
|
|
2014-09-11 20:04:24 +00:00
|
|
|
|
return names.empty ? null : [it.getAnimeId().pad(5)] + names.take(4)
|
2013-09-07 15:48:24 +00:00
|
|
|
|
}
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
|
|
|
|
// join and sort
|
2013-09-07 15:48:24 +00:00
|
|
|
|
def anidb_txt = anidb_index.findResults{ row -> row.join('\t') }.sort().unique()
|
2013-08-10 05:23:14 +00:00
|
|
|
|
|
|
|
|
|
// sanity check
|
2014-04-19 16:54:25 +00:00
|
|
|
|
if (anidb_txt.size() < 8000) { die('AniDB index sanity failed:' + anidb_txt.size()) }
|
2014-01-24 17:31:33 +00:00
|
|
|
|
pack(anidb_out, anidb_txt)
|