2012-02-26 17:02:54 +00:00
|
|
|
// filebot -script BuildData.groovy -trust-script
|
2012-02-24 13:39:32 +00:00
|
|
|
|
2012-02-26 17:02:54 +00:00
|
|
|
def s_out = new File("website/data/series.list.gz")
|
|
|
|
def m_out = new File("website/data/movies.txt.gz")
|
|
|
|
|
|
|
|
def gz(file, lines) {
|
|
|
|
file.withOutputStream{ out ->
|
|
|
|
new java.util.zip.GZIPOutputStream(out).withWriter('utf-8'){ writer ->
|
|
|
|
lines.each{ writer.append(it).append('\n') }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ------------------------------------------------------------------------- //
|
|
|
|
|
|
|
|
|
|
|
|
// BUILD movies.txt.gz
|
|
|
|
def tsv = new URL("http://www.opensubtitles.org/addons/export_movie.php")
|
|
|
|
def movies = []
|
|
|
|
|
|
|
|
tsv.text.eachLine{
|
|
|
|
def line = it.split(/\t/)*.replaceAll(/\s+/, ' ')*.trim()
|
|
|
|
if (line.size() == 4 && line[0] =~ /\d+/) {
|
|
|
|
movies.add([line[1].toInteger(), line[2], line[3].toInteger()])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-06-22 07:47:26 +00:00
|
|
|
movies = movies.findAll{ it[0] <= 9999999 && it[2] >= 1930 && it[1] =~ /^[A-Z0-9]/ && it[1] =~ /[\p{Alpha}]{3}/ }.sort{ it[1] }
|
2012-02-26 17:02:54 +00:00
|
|
|
|
|
|
|
gz(m_out, movies.collect{ [it[0].pad(7), it[1], it[2]].join('\t') })
|
|
|
|
println "Movie Count: " + movies.size()
|
|
|
|
|
|
|
|
|
|
|
|
// ------------------------------------------------------------------------- //
|
|
|
|
|
|
|
|
|
|
|
|
// BUILD series.list.gz
|
2012-02-23 18:48:35 +00:00
|
|
|
def page = new URL('http://thetvdb.com/?string=&searchseriesid=&tab=listseries&function=Search')
|
|
|
|
|
|
|
|
def names = page.fetch().getHtml('utf-8')
|
|
|
|
.depthFirst().TABLE.find{it['@id'] == "listtable"}
|
|
|
|
.depthFirst().TR.findAll{ it.TD.size() == 3 && it.TD[1].text() == 'English'}
|
|
|
|
.findResults{ it.TD[0].A.text() }
|
|
|
|
|
2012-02-27 09:56:15 +00:00
|
|
|
if (names.size() == 0) {
|
|
|
|
throw new Exception("Failed to scrape series names")
|
|
|
|
}
|
|
|
|
|
2012-02-23 18:48:35 +00:00
|
|
|
def anime = net.sourceforge.filebot.WebServices.AniDB.getAnimeTitles()
|
|
|
|
names += anime.findResults{ it.getPrimaryTitle() }
|
|
|
|
names += anime.findResults{ it.getOfficialTitle('en') }
|
|
|
|
|
2012-02-26 17:02:54 +00:00
|
|
|
names = names.findAll{ it =~ /^[A-Z0-9]/ && it =~ /[\p{Alpha}]{3}/}.findResults{ net.sourceforge.filebot.similarity.Normalization.normalizePunctuation(it) }
|
2012-03-20 18:18:34 +00:00
|
|
|
|
|
|
|
def unique = new TreeSet(String.CASE_INSENSITIVE_ORDER)
|
|
|
|
unique.addAll(names)
|
|
|
|
names = unique as List
|
2012-02-23 18:48:35 +00:00
|
|
|
|
|
|
|
|
2012-02-26 17:02:54 +00:00
|
|
|
gz(s_out, names)
|
2012-02-23 18:48:35 +00:00
|
|
|
println "Series Count: " + names.size()
|