+ rebuild movie index with imdb AND tmdb IDs

This commit is contained in:
Reinhard Pointner 2013-11-21 14:31:31 +00:00
parent 57f451025f
commit 746ab459f3
2 changed files with 11 additions and 7 deletions

View File

@ -41,7 +41,7 @@ println "Reviews: " + reviews.size()
/* ------------------------------------------------------------------------- */
def movies_out = new File("website/data/moviedb.txt")
def moviedb_out = new File("website/data/moviedb.txt")
def thetvdb_out = new File("website/data/thetvdb.txt")
def anidb_out = new File("website/data/anidb.txt")
@ -58,6 +58,10 @@ def pack(file, lines) {
// BUILD moviedb index
def isValidMovieName(s) {
return s=~ /^[A-Z0-9]/ && s =~ /[\p{Alpha}]{3}/
}
def treeSort(list, keyFunction) {
def sorter = new TreeMap(String.CASE_INSENSITIVE_ORDER)
list.each{
@ -67,10 +71,9 @@ def treeSort(list, keyFunction) {
}
def omdb = new TreeSet({ a, b -> a[0].compareTo(b[0]) } as Comparator)
def omdb = []
new File('omdb.txt').eachLine('Windows-1252'){
def line = it.split(/\t/)
if (line.length > 11 && line[0] ==~ /\d+/) {
def imdbid = line[1].substring(2).toInteger()
def name = line[2].replaceAll(/\s+/, ' ').trim()
@ -84,7 +87,6 @@ new File('omdb.txt').eachLine('Windows-1252'){
}
}
}
def isValidMovieName = { s -> s =~ /^[A-Z0-9]/ && s =~ /[\p{Alpha}]{3}/ }
omdb = omdb.findAll{ (it[0] as int) <= 9999999 && isValidMovieName(it[1]) }
@ -109,6 +111,7 @@ def tmdb = omdb.findResults{ m ->
tmdb_txt << row.join('\t') << '\n'
return row
}
tmdb*.join('\t').join('\n').saveAs(tmdb_txt)
movies = tmdb.findResults{
def ity = it[1..3] // imdb id, tmdb id, year
@ -120,11 +123,11 @@ movies = tmdb.findResults{
}
movies = treeSort(movies, { it[3, 2].join(' ') })
pack(movies_out, movies.findResults{ it.join('\t') })
pack(moviedb_out, movies.findResults{ it.join('\t') })
println "Movie Count: " + movies.size()
// sanity check
if (movies.size() < 40000) { throw new Exception('Movie index sanity failed') }
if (movies.size() < 50000) { throw new Exception('Movie index sanity failed') }
/* ------------------------------------------------------------------------- */

View File

@ -1,9 +1,10 @@
options +indexes
redirect 301 /data/movies.txt.xz http://sourceforge.net/projects/filebot/files/data/movies.txt.xz/download
redirect 301 /data/moviedb.txt.xz http://sourceforge.net/projects/filebot/files/data/moviedb.txt.xz/download
redirect 301 /data/thetvdb.txt.xz http://sourceforge.net/projects/filebot/files/data/thetvdb.txt.xz/download
redirect 301 /data/anidb.txt.xz http://sourceforge.net/projects/filebot/files/data/anidb.txt.xz/download
redirect 301 /data/movies.txt.xz http://sourceforge.net/projects/filebot/files/data/movies.txt.xz/download
redirect 301 /data/movies.txt.gz http://sourceforge.net/projects/filebot/files/data/movies.txt.gz/download
redirect 301 /data/thetvdb.txt.gz http://sourceforge.net/projects/filebot/files/data/thetvdb.txt.gz/download
redirect 301 /data/anidb.txt.gz http://sourceforge.net/projects/filebot/files/data/anidb.txt.gz/download