2015-05-11 10:35:45 +00:00
#!/usr/bin/env filebot -script
2016-02-24 19:33:04 +00:00
import java.util.regex.*
2014-01-08 08:36:32 +00:00
import org.tukaani.xz.*
2013-11-20 10:07:25 +00:00
2016-02-24 19:27:31 +00:00
2013-11-20 10:07:25 +00:00
/* ------------------------------------------------------------------------- */
2013-08-10 05:23:14 +00:00
2016-02-24 19:27:31 +00:00
2015-05-11 13:57:04 +00:00
def dir_root = ".."
def dir_website = "${dir_root}/website"
2015-05-11 10:35:45 +00:00
def dir_data = "${dir_website}/data"
2013-08-10 05:23:14 +00:00
2016-03-12 13:46:42 +00:00
new File ( dir_data ) . mkdirs ( )
2016-02-24 19:27:31 +00:00
// sort and check shared regex collections
[ 'add-series-alias.txt' ,
'exclude-blacklist.txt' ,
'query-blacklist.txt' ,
'release-groups.txt' ,
'series-mappings.txt'
] . each {
def input = new URL ( "https://raw.githubusercontent.com/filebot/data/master/${it}" )
def output = new File ( "${dir_data}/${it}" )
2016-03-08 12:59:24 +00:00
def lines = new TreeSet ( String . CASE_INSENSITIVE_ORDER )
2016-02-24 19:38:03 +00:00
input . getText ( 'UTF-8' ) . split ( /\R/ ) * . trim ( ) . findAll { it . length ( ) > 0 } . each {
2016-03-08 12:59:24 +00:00
lines + = Pattern . compile ( it ) . pattern ( )
2013-08-10 05:23:14 +00:00
}
2016-03-10 03:53:49 +00:00
println input
lines . each { println it }
2016-03-08 12:59:24 +00:00
pack ( output , lines )
2016-02-24 19:27:31 +00:00
}
2013-08-10 05:23:14 +00:00
2013-11-20 10:07:25 +00:00
/* ------------------------------------------------------------------------- */
2013-08-10 05:23:14 +00:00
def reviews = [ ]
2015-05-27 16:47:17 +00:00
new File ( "${dir_root}/reviews.tsv" ) . eachLine ( 'UTF-8' ) {
2015-05-28 11:59:27 +00:00
def s = it . split ( /\t/ , 3 ) * . trim ( ) * . replaceAll ( '["]{2}' , '"' )
2015-05-27 16:47:17 +00:00
reviews < < [ user: s [ 0 ] , date: s [ 1 ] , text: s [ 2 ] ]
2013-11-20 10:07:25 +00:00
}
2013-08-10 05:23:14 +00:00
reviews = reviews . sort { it . date }
def json = new groovy . json . JsonBuilder ( )
json . call ( reviews as List )
2015-05-11 10:35:45 +00:00
json . toPrettyString ( ) . saveAs ( "${dir_website}/reviews.json" )
2013-08-10 05:23:14 +00:00
println "Reviews: " + reviews . size ( )
2013-11-20 10:07:25 +00:00
/* ------------------------------------------------------------------------- */
2013-08-10 05:23:14 +00:00
2015-05-11 10:35:45 +00:00
def moviedb_out = new File ( "${dir_data}/moviedb.txt" )
def thetvdb_out = new File ( "${dir_data}/thetvdb.txt" )
def anidb_out = new File ( "${dir_data}/anidb.txt" )
def osdb_out = new File ( "${dir_data}/osdb.txt" )
2015-05-11 09:13:35 +00:00
2013-08-10 05:23:14 +00:00
2013-08-10 07:56:11 +00:00
def pack ( file , lines ) {
2016-03-10 03:53:49 +00:00
file . withOutputStream { out - >
out . withWriter ( 'UTF-8' ) { writer - >
lines . each { writer . append ( it ) . append ( '\n' ) }
}
}
2013-08-10 07:56:11 +00:00
new File ( file . parentFile , file . name + '.xz' ) . withOutputStream { out - >
new XZOutputStream ( out , new LZMA2Options ( LZMA2Options . PRESET_DEFAULT ) ) . withWriter ( 'UTF-8' ) { writer - >
2013-08-10 05:23:14 +00:00
lines . each { writer . append ( it ) . append ( '\n' ) }
}
}
2014-01-07 15:21:38 +00:00
def rows = lines . size ( )
def columns = lines . collect { it . split ( /\t/ ) . length } . max ( )
2015-05-11 13:57:04 +00:00
println "${file.canonicalFile} ($rows rows, $columns columns)"
2013-08-10 05:23:14 +00:00
}
2013-11-20 10:07:25 +00:00
/* ------------------------------------------------------------------------- */
2013-11-21 14:31:31 +00:00
def isValidMovieName ( s ) {
2014-03-06 15:50:14 +00:00
return ( s . normalizePunctuation ( ) . length ( ) > = 4 ) | | ( s = ~ /^[A-Z0-9]/ & & s = ~ /[\p{Alnum}]{3}/ )
2013-11-21 14:31:31 +00:00
}
2014-01-06 23:22:31 +00:00
def getNamePermutations ( names ) {
2014-09-15 23:49:19 +00:00
def normalize = { s - > s . toLowerCase ( ) . normalizePunctuation ( ) } . memoize ( )
2016-03-12 10:47:39 +00:00
def fn1 = { s - > def n = s . replaceAll ( /(?i)(^(The|A)\s)|([,]\s(The|A)$)/ , '' ) ; s = ~ /^(?i:The|A)/ & & n = = ~ /\w+/ ? s : n } // e.g. The Walking Dead => Walking Dead, The Voice => The Voice
2014-01-06 23:22:31 +00:00
def fn2 = { s - > s . replaceAll ( /\s&\s/ , ' and ' ) }
def fn3 = { s - > s . replaceAll ( /\([^\)]*\)$/ , '' ) }
2014-09-11 20:04:24 +00:00
def out = names * . trim ( ) . unique ( ) . collectMany { original - >
2014-09-15 19:04:25 +00:00
def simplified = original
[ fn1 , fn2 , fn3 ] . each { fn - > simplified = fn ( simplified ) . trim ( ) }
return [ original , simplified ]
} . unique { normalize ( it ) } . findAll { it . length ( ) > 0 }
2014-03-18 20:08:06 +00:00
out = out . findAll { it . length ( ) > = 2 & & ! ( it = = ~ /[1][0-9][1-9]/ ) & & ! ( it = ~ /^[a-z]/ ) & & it = ~ /^[@.\p{L}\p{Digit}]/ } // MUST START WITH UNICODE LETTER
2014-01-11 09:04:49 +00:00
out = out . findAll { ! MediaDetection . releaseInfo . structureRootPattern . matcher ( it ) . matches ( ) } // IGNORE NAMES THAT OVERLAP WITH MEDIA FOLDER NAMES
2014-10-10 18:17:31 +00:00
// out = out.findAll{ a -> names.take(1).contains(a) || out.findAll{ b -> normalize(a).startsWith(normalize(b) + ' ') }.size() == 0 } // TRY TO EXCLUDE REDUNDANT SUBSTRING DUPLICATES
2014-09-15 23:49:19 +00:00
2014-09-15 19:04:25 +00:00
return out
2014-01-06 23:22:31 +00:00
}
2013-11-20 10:07:25 +00:00
def treeSort ( list , keyFunction ) {
def sorter = new TreeMap ( String . CASE_INSENSITIVE_ORDER )
list . each {
sorter . put ( keyFunction ( it ) , it )
}
return sorter . values ( )
}
2014-04-18 19:41:39 +00:00
def csv ( f , delim , keyIndex , valueIndex ) {
def values = [ : ]
if ( f . isFile ( ) ) {
f . splitEachLine ( delim , 'UTF-8' ) { line - >
values . put ( line [ keyIndex ] , tryQuietly { line [ valueIndex ] } )
}
}
return values
}
2013-08-10 05:23:14 +00:00
2016-03-12 13:46:42 +00:00
/* ------------------------------------------------------------------------- */
if ( _args . mode = = /no-index/ ) {
return
}
2014-03-09 12:50:03 +00:00
/* ------------------------------------------------------------------------- */
// BUILD moviedb index
2013-11-21 14:31:31 +00:00
def omdb = [ ]
2015-05-11 13:57:04 +00:00
new File ( 'omdbMovies.txt' ) . eachLine ( 'Windows-1252' ) {
2013-08-10 05:23:14 +00:00
def line = it . split ( /\t/ )
2014-09-26 16:41:42 +00:00
if ( line . length > 11 & & line [ 0 ] = = ~ /\d+/ & & line [ 3 ] = = ~ /\d{4}/ ) {
2013-08-10 05:23:14 +00:00
def imdbid = line [ 1 ] . substring ( 2 ) . toInteger ( )
2013-09-10 10:12:55 +00:00
def name = line [ 2 ] . replaceAll ( /\s+/ , ' ' ) . trim ( )
2013-08-10 05:23:14 +00:00
def year = line [ 3 ] . toInteger ( )
def runtime = line [ 5 ]
2014-10-15 22:16:11 +00:00
def genres = line [ 6 ]
2014-02-18 06:55:45 +00:00
def rating = tryQuietly { line [ 12 ] . toFloat ( ) } ? : 0
def votes = tryQuietly { line [ 13 ] . replaceAll ( /\D/ , '' ) . toInteger ( ) } ? : 0
2013-08-10 05:23:14 +00:00
2015-11-12 12:20:25 +00:00
if ( ! ( genres = ~ /Short/ | | votes < = 100 | | rating < = 2 ) & & ( ( year > = 1970 & & ( runtime = ~ /(\d.h)|(\d{2,3}.min)/ | | votes > = 1000 ) ) | | ( year > = 1950 & & votes > = 20000 ) ) ) {
2013-11-20 10:07:25 +00:00
omdb < < [ imdbid . pad ( 7 ) , name , year ]
2013-08-10 05:23:14 +00:00
}
}
}
2013-11-20 10:07:25 +00:00
omdb = omdb . findAll { ( it [ 0 ] as int ) < = 9999999 & & isValidMovieName ( it [ 1 ] ) }
2013-08-10 05:23:14 +00:00
2013-11-20 10:07:25 +00:00
def tmdb_txt = new File ( 'tmdb.txt' )
def tmdb_index = csv ( tmdb_txt , '\t' , 1 , [ 0 . . - 1 ] )
2013-08-10 05:23:14 +00:00
2014-08-27 18:26:06 +00:00
def tmdb = [ ]
omdb . each { m - >
2014-01-23 18:18:25 +00:00
def sync = System . currentTimeMillis ( )
2014-09-17 07:30:33 +00:00
if ( tmdb_index . containsKey ( m [ 0 ] ) & & ( sync - tmdb_index [ m [ 0 ] ] [ 0 ] . toLong ( ) ) < ( ( m [ 2 ] . toInteger ( ) < 2000 ? 360 : 120 ) * 24 * 60 * 60 * 1000L ) ) {
2014-08-27 18:26:06 +00:00
tmdb < < tmdb_index [ m [ 0 ] ]
return
2013-11-20 10:07:25 +00:00
}
try {
2014-08-22 06:59:30 +00:00
def info = WebServices . TheMovieDB . getMovieInfo ( "tt${m[0]}" , Locale . ENGLISH , true )
2014-10-31 10:36:18 +00:00
if ( info . votes < = 1 | | info . rating < = 2 )
2015-11-12 11:43:13 +00:00
throw new IllegalArgumentException ( 'Insufficient movie data: ' + info )
2014-10-31 10:36:18 +00:00
2014-01-23 18:18:25 +00:00
def names = [ info . name , info . originalName ] + info . alternativeTitles
2014-08-27 18:26:06 +00:00
[ info ? . released ? . year , m [ 2 ] ] . findResults { it ? . toInteger ( ) } . unique ( ) . each { y - >
def row = [ sync , m [ 0 ] . pad ( 7 ) , info . id . pad ( 7 ) , y . pad ( 4 ) ] + names
println row
tmdb < < row
2014-01-23 18:18:25 +00:00
}
2015-11-12 11:43:13 +00:00
} catch ( IllegalArgumentException | FileNotFoundException e ) {
2015-11-12 11:30:49 +00:00
printException ( e , false )
2014-08-27 18:26:06 +00:00
def row = [ sync , m [ 0 ] . pad ( 7 ) , 0 , m [ 2 ] , m [ 1 ] ]
println row
tmdb < < row
2013-11-20 10:07:25 +00:00
}
}
2013-11-21 14:31:31 +00:00
tmdb * . join ( '\t' ) . join ( '\n' ) . saveAs ( tmdb_txt )
2013-11-20 10:07:25 +00:00
movies = tmdb . findResults {
def ity = it [ 1 . . 3 ] // imdb id, tmdb id, year
2014-01-06 23:22:31 +00:00
def names = getNamePermutations ( it [ 4 . . - 1 ] ) . findAll { isValidMovieName ( it ) }
2013-11-21 16:31:09 +00:00
if ( ity [ 0 ] . toInteger ( ) > 0 & & ity [ 1 ] . toInteger ( ) > 0 & & names . size ( ) > 0 )
2013-11-20 10:07:25 +00:00
return ity + names
else
return null
}
movies = treeSort ( movies , { it [ 3 , 2 ] . join ( ' ' ) } )
2013-08-10 05:23:14 +00:00
// sanity check
2014-10-31 15:59:16 +00:00
if ( movies . size ( ) < 20000 ) { die ( 'Movie index sanity failed:' + movies . size ( ) ) }
2014-01-24 17:31:33 +00:00
pack ( moviedb_out , movies * . join ( '\t' ) )
2013-11-20 10:07:25 +00:00
2013-08-10 05:23:14 +00:00
2013-11-20 10:07:25 +00:00
/* ------------------------------------------------------------------------- */
2013-08-10 05:23:14 +00:00
2013-11-20 10:07:25 +00:00
// BUILD tvdb index
2013-08-10 05:23:14 +00:00
def tvdb_txt = new File ( 'tvdb.txt' )
2013-12-02 18:25:06 +00:00
def tvdb = [ : ]
2014-03-09 12:50:03 +00:00
if ( tvdb_txt . exists ( ) ) {
2014-08-14 17:29:34 +00:00
tvdb_txt . eachLine ( 'UTF-8' ) {
2014-03-09 12:50:03 +00:00
def line = it . split ( '\t' ) . toList ( )
2014-08-16 16:28:40 +00:00
def names = line . subList ( 5 , line . size ( ) )
tvdb . put ( line [ 1 ] as Integer , [ line [ 0 ] as Long , line [ 1 ] as Integer , line [ 2 ] , line [ 3 ] as Float , line [ 4 ] as Float ] + names )
2014-03-09 12:50:03 +00:00
}
2013-08-10 05:23:14 +00:00
}
2014-08-16 03:07:51 +00:00
def tvdb_updates = [ : ] as TreeMap
2014-08-14 17:29:34 +00:00
new File ( 'updates_all.xml' ) . eachLine ( 'UTF-8' ) {
def m = ( it = ~ '<Series><id>(\\d+)</id><time>(\\d+)</time></Series>' )
2014-08-16 02:40:39 +00:00
while ( m . find ( ) ) {
def id = m . group ( 1 ) as Integer
def time = m . group ( 2 ) as Integer
tvdb_updates [ id ] = [ id: id , time: time ]
2014-08-14 17:29:34 +00:00
}
}
2014-12-05 16:21:13 +00:00
// blacklist crap entries
2014-12-05 17:28:56 +00:00
tvdb_updates . remove ( 219901 )
tvdb_updates . remove ( 256135 )
2014-12-05 16:21:13 +00:00
2014-08-14 17:29:34 +00:00
2014-08-16 02:40:39 +00:00
tvdb_updates . values ( ) . each { update - >
2014-03-10 05:34:53 +00:00
if ( tvdb [ update . id ] = = null | | update . time > tvdb [ update . id ] [ 0 ] ) {
2013-08-10 05:23:14 +00:00
try {
2015-02-22 10:52:33 +00:00
retry ( 2 , 60000 ) {
2014-08-15 09:58:42 +00:00
def seriesNames = [ ]
2014-04-18 19:41:39 +00:00
def xml = new XmlSlurper ( ) . parse ( "http://thetvdb.com/api/BA864DEE427E384A/series/${update.id}/en.xml" )
def imdbid = xml . Series . IMDB_ID . text ( )
2014-08-15 09:58:42 +00:00
seriesNames + = xml . Series . SeriesName . text ( )
2014-03-09 12:50:03 +00:00
2014-04-18 19:41:39 +00:00
def rating = tryQuietly { xml . Series . Rating . text ( ) . toFloat ( ) }
2014-05-15 08:18:50 +00:00
def votes = tryQuietly { xml . Series . RatingCount . text ( ) . toFloat ( ) }
2014-03-09 12:50:03 +00:00
2014-08-15 09:58:42 +00:00
// only retrieve additional data for reasonably popular shows
if ( votes > = 5 & & rating > = 4 ) {
tryLogCatch {
if ( imdbid = ~ /tt(\d+)/ ) {
2014-09-15 20:41:51 +00:00
seriesNames + = OMDb . getMovieDescriptor ( new Movie ( null , 0 , imdbid . match ( /tt(\d+)/ ) as int , - 1 ) , Locale . ENGLISH ) . getName ( )
2014-08-15 09:58:42 +00:00
}
2013-08-10 05:23:14 +00:00
}
2014-08-15 09:58:42 +00:00
// scrape extra alias titles from webpage (not supported yet by API)
2014-11-28 08:27:37 +00:00
def jsoup = org . jsoup . Jsoup . connect ( "http://thetvdb.com/?tab=series&id=${update.id}" ) . get ( )
def akaseries = jsoup . select ( '#akaseries table tr table tr' )
. findAll { it . select ( 'td' ) . any { it . text ( ) = = ~ /en/ } }
. findResults { it . select ( 'td' ) . first ( ) . text ( ) }
. findAll { it ? . length ( ) > 0 }
2014-11-28 15:59:47 +00:00
def intlseries = jsoup . select ( '#seriesform input' )
2014-11-28 08:27:37 +00:00
. findAll { it . attr ( 'name' ) = ~ /SeriesName/ }
. sort { it . attr ( 'name' ) . match ( /\d+/ ) as int }
. collect { it . attr ( 'value' ) }
. findAll { it ? . length ( ) > 0 }
println "Scraped data $akaseries and $intlseries for series $seriesNames"
seriesNames + = akaseries
seriesNames + = intlseries
2013-08-10 05:23:14 +00:00
}
2014-08-15 09:58:42 +00:00
def data = [ update . time , update . id , imdbid , rating ? : 0 , votes ? : 0 ] + seriesNames . findAll { it ! = null & & it . length ( ) > 0 }
2013-08-10 05:23:14 +00:00
tvdb . put ( update . id , data )
println "Update $update => $data"
}
}
catch ( Throwable e ) {
2014-08-16 02:40:39 +00:00
printException ( e , false )
2014-08-15 09:58:42 +00:00
def data = [ update . time , update . id , '' , 0 , 0 ]
2013-08-10 05:23:14 +00:00
tvdb . put ( update . id , data )
println "Update $update => $data"
}
}
}
2013-12-02 18:25:06 +00:00
// remove entries that have become invalid
tvdb . keySet ( ) . toList ( ) . each { id - >
2014-08-16 02:40:39 +00:00
if ( tvdb_updates [ id ] = = null ) {
2013-12-02 18:25:06 +00:00
println "Invalid ID found: ${tvdb[id]}"
tvdb . remove ( id )
}
}
2014-05-12 19:18:31 +00:00
tvdb . values ( ) . findResults { it . collect { it . toString ( ) . replace ( '\t' , '' ) . trim ( ) } . join ( '\t' ) } . join ( '\n' ) . saveAs ( tvdb_txt )
2013-08-10 05:23:14 +00:00
def thetvdb_index = [ ]
2014-01-06 23:22:31 +00:00
tvdb . values ( ) . each { r - >
2014-03-09 12:50:03 +00:00
def tvdb_id = r [ 1 ]
2014-08-15 09:58:42 +00:00
def rating = r [ 3 ]
def votes = r [ 4 ]
2014-08-16 16:28:40 +00:00
def names = r . subList ( 5 , r . size ( ) )
2016-02-25 19:30:41 +00:00
if ( ( votes > = 5 & & rating > = 4 ) | | ( votes > = 2 & & rating > = 6 ) | | ( votes > = 1 & & rating > = 10 ) ) {
2014-08-15 09:58:42 +00:00
getNamePermutations ( names ) . each { n - >
2014-03-09 12:50:03 +00:00
thetvdb_index < < [ tvdb_id , n ]
}
2013-08-10 05:23:14 +00:00
}
}
2015-03-24 07:36:55 +00:00
// additional custom mappings
2015-05-11 10:35:45 +00:00
new File ( "${dir_data}/add-series-alias.txt" ) . splitEachLine ( /\t+/ , 'UTF-8' ) { row - >
2015-03-24 07:36:55 +00:00
def se = thetvdb_index . find { row [ 0 ] = = it [ 1 ] & & ! it . contains ( row [ 1 ] ) }
if ( se = = null ) die ( "Unabled to find series '${row[0]}': '${row[1]}'" )
2015-03-24 08:01:27 +00:00
thetvdb_index < < [ se [ 0 ] , row [ 1 ] ]
2013-12-14 10:49:16 +00:00
}
2015-11-03 03:43:54 +00:00
thetvdb_index = thetvdb_index . findResults { [ it [ 0 ] as Integer , it [ 1 ] . replaceAll ( /\s+/ , ' ' ) . trim ( ) ] } . findAll { ! ( it [ 1 ] = ~ /(?i:duplicate|Series.Not.Permitted)/ | | it [ 1 ] = ~ /\d{6,}/ | | it [ 1 ] . startsWith ( '*' ) | | it [ 1 ] . endsWith ( '*' ) | | it [ 1 ] . length ( ) < 2 ) }
2014-05-12 19:18:31 +00:00
thetvdb_index = thetvdb_index . sort { a , b - > a [ 0 ] < = > b [ 0 ] }
2013-08-10 05:23:14 +00:00
// join and sort
2014-11-28 19:26:57 +00:00
def thetvdb_txt = thetvdb_index . groupBy { it [ 0 ] } . findResults { k , v - > ( [ k . pad ( 6 ) ] + v * . getAt ( 1 ) . unique { it . toLowerCase ( ) } ) . join ( '\t' ) }
2013-12-15 18:35:41 +00:00
2013-08-10 05:23:14 +00:00
// sanity check
2014-04-19 16:54:25 +00:00
if ( thetvdb_txt . size ( ) < 4000 ) { die ( 'TheTVDB index sanity failed: ' + thetvdb_txt . size ( ) ) }
2014-01-24 17:31:33 +00:00
pack ( thetvdb_out , thetvdb_txt )
2013-08-10 05:23:14 +00:00
2013-11-20 10:07:25 +00:00
/* ------------------------------------------------------------------------- */
2013-08-10 05:23:14 +00:00
2015-05-24 22:54:59 +00:00
// BUILD osdb index
def osdb = [ ]
new File ( 'osdb.txt' ) . eachLine ( 'UTF-8' ) {
def fields = it . split ( /\t/ ) * . trim ( )
// 0 IDMovie, 1 IDMovieImdb, 2 MovieName, 3 MovieYear, 4 MovieKind, 5 MoviePriority
if ( fields . size ( ) = = 6 & & fields [ 1 ] = = ~ /\d+/ & & fields [ 3 ] = = ~ /\d{4}/ ) {
2015-05-25 08:28:38 +00:00
if ( fields [ 4 ] = = ~ /movie|tv.series/ & & isValidMovieName ( fields [ 2 ] ) & & ( fields [ 3 ] as int ) > = 1970 & & ( fields [ 5 ] as int ) > = 500 ) {
2015-05-24 22:54:59 +00:00
// 0 imdbid, 1 name, 2 year, 3 kind, 4 priority
osdb < < [ fields [ 1 ] as int , fields [ 2 ] , fields [ 3 ] as int , fields [ 4 ] = = /movie/ ? 'm' : fields [ 4 ] = = /tv series/ ? 's' : '?' , fields [ 5 ] as int ]
}
}
}
// sort reverse by score
osdb . sort { a , b - > b [ 4 ] < = > a [ 4 ] }
// reset score/priority because it's currently not used
osdb * . set ( 4 , 0 )
2015-05-25 08:28:38 +00:00
// map by imdbid
def tvdb_index = tvdb . values ( ) . findAll { it [ 2 ] = ~ /tt(\d+)/ } . collectEntries { [ it [ 2 ] . substring ( 2 ) . pad ( 7 ) , it ] }
2015-05-24 22:54:59 +00:00
// collect final output data
osdb = osdb . findResults {
def names = [ it [ 1 ] ]
if ( it [ 3 ] = = 'm' ) {
def tmdb_entry = tmdb_index [ it [ 0 ] . pad ( 7 ) ]
2015-05-25 08:28:38 +00:00
if ( tmdb_entry ! = null & & tmdb_entry . size ( ) > 4 ) {
2015-05-24 22:54:59 +00:00
names + = tmdb_entry [ 4 . . - 1 ]
}
2015-05-25 08:28:38 +00:00
} else if ( it [ 3 ] = = 's' ) {
def tvdb_entry = tvdb_index [ it [ 0 ] . pad ( 7 ) ]
if ( tvdb_entry ! = null & & tvdb_entry . size ( ) > 5 ) {
names + = tvdb_entry [ 5 . . - 1 ]
}
2015-05-24 22:54:59 +00:00
}
2015-05-25 08:28:38 +00:00
// 0 kind, 1 score, 2 imdbid, 3 year, 4-n names
2015-05-24 22:54:59 +00:00
return [ it [ 3 ] , it [ 4 ] , it [ 0 ] , it [ 2 ] ] + names . unique ( )
}
// sanity check
2015-06-13 05:03:55 +00:00
if ( osdb . size ( ) < 15000 ) { die ( 'OSDB index sanity failed:' + osdb . size ( ) ) }
2015-05-24 22:54:59 +00:00
pack ( osdb_out , osdb * . join ( '\t' ) )
2015-05-25 08:41:33 +00:00
/* ------------------------------------------------------------------------- */
// BUILD anidb index
2016-02-05 16:31:53 +00:00
def anidb = new AnidbClient ( 'filebot' , 6 ) . getAnimeTitles ( )
def animeExcludes = new HashSet ( )
// exclude anime movies from anime index
new File ( 'anime-list.xml' ) . eachLine ( 'UTF-8' ) {
2016-02-06 12:23:53 +00:00
if ( it = ~ /tvdbid="movie"/ | | it = ~ /imdbid="ttd\+"/ ) {
2016-02-05 16:31:53 +00:00
animeExcludes < < it . match ( /anidbid="(\d+)"/ ) . toInteger ( )
}
}
2015-05-25 08:41:33 +00:00
def anidb_index = anidb . findResults {
2016-02-05 16:31:53 +00:00
if ( animeExcludes . contains ( it . animeId ) )
return null
2015-05-25 08:41:33 +00:00
def names = it . effectiveNames * . replaceAll ( /\s+/ , ' ' ) * . trim ( ) * . replaceAll ( /['`´ ‘ ’ ʻ ]+/ , /'/ )
names = getNamePermutations ( names )
names = names . findAll { stripReleaseInfo ( it ) ? . length ( ) > 0 }
2016-02-05 16:31:53 +00:00
return names . empty ? null : [ it . animeId . pad ( 5 ) ] + names . take ( 4 )
2015-05-25 08:41:33 +00:00
}
// join and sort
def anidb_txt = anidb_index . findResults { row - > row . join ( '\t' ) } . sort ( ) . unique ( )
// sanity check
2016-02-05 16:31:53 +00:00
if ( anidb_txt . size ( ) < 8000 | | animeExcludes . size ( ) < 500 ) { die ( 'AniDB index sanity failed:' + anidb_txt . size ( ) ) }
2015-05-25 08:41:33 +00:00
pack ( anidb_out , anidb_txt )