* added extra release info

This commit is contained in:
Reinhard Pointner 2012-07-16 02:36:49 +00:00
parent 59b67d7910
commit ef80b0ec10
3 changed files with 24 additions and 9 deletions

View File

@ -3,7 +3,7 @@
def sortRegexList(path) {
def set = new TreeSet(String.CASE_INSENSITIVE_ORDER)
new File(path).eachLine{
new File(path).eachLine('UTF-8'){
// check if regex compiles
set += java.util.regex.Pattern.compile(it).pattern()
}
@ -26,7 +26,7 @@ def movies_out = new File("website/data/movies.txt.gz")
def gz(file, lines) {
file.withOutputStream{ out ->
new java.util.zip.GZIPOutputStream(out).withWriter('utf-8'){ writer ->
new java.util.zip.GZIPOutputStream(out).withWriter('UTF-8'){ writer ->
lines.each{ writer.append(it).append('\n') }
}
}
@ -91,17 +91,22 @@ println "Movie Count: " + movies.size()
// BUILD series.list.gz
// TheTVDB
def thetvdb_index = new URL('http://thetvdb.com/?string=&searchseriesid=&tab=listseries&function=Search')
def thetvdb_names = thetvdb_index.fetch().getHtml('UTF-8')
.depthFirst().TABLE.find{it['@id'] == "listtable"}
.depthFirst().TR.findAll{ it.TD.size() == 3 && it.TD[1].text() == 'English'}
.findResults{ it.TD[0].A.text() }
def imdb_series_names = imdb.findAll{ it.size() >= 3 && it[1].startsWith('"') }.collect{ it[1] }
// AniDB
def anidb_names = net.sourceforge.filebot.WebServices.AniDB.getAnimeTitles().findResults{ [it.getPrimaryTitle(), it.getOfficialTitle('en')] }.flatten()
/*
// IMDb series list
def imdb_series_names = imdb.findAll{ it.size() >= 3 && it[1].startsWith('"') }.collect{ it[1] }
// Dokuwiki list
def dokuwiki_index = new URL('http://docuwiki.net/postbot/getList.php?subject=Name')
def doku_names = []
dokuwiki_index.getText('UTF-8').eachLine{
@ -109,7 +114,7 @@ dokuwiki_index.getText('UTF-8').eachLine{
}
*/
def names = [thetvdb_names, imdb_series_names, anidb_names]
def names = [thetvdb_names, anidb_names]
names.each{ if (it.size() == 0) throw new Exception("Failed to scrape series names") } // sanity check
names = names.flatten().findAll{ it =~ /^[A-Z0-9]/ && it =~ /[\p{Alpha}]{3}/}.findResults{ net.sourceforge.filebot.similarity.Normalization.normalizePunctuation(it) } // collect and normalize names

View File

@ -1,9 +1,6 @@
(?-i:CLASSIC|CLASSiC)
(?-i:ENGLISH)
(?-i:FRENCH)
(?-i:GERMAN)
(?-i:LAB)
(?-i:LIMITED|LiMiTED)
(?-i:SPANISH)
(?-i:SWEDISH|SWEDiSH)
.+sample$
@ -19,13 +16,13 @@
^AUDIO_TS$
^BDMV$
^Cover
^download[s]?$
^DVD
^Film[s]?
^HVDVD_TS$
^Movie[s]?
^new$
^other$
^SAMPLE
^Season.[0-9]+
^Torrents[s]?
^Tracker
@ -43,6 +40,7 @@ CBC
CD[0]?[1-3]
Channel.4
Channel.5
CLASSIC
CN
CVCD
DC
@ -83,6 +81,8 @@ k.tk.crew
KIDZCORNER
KOR
KORSUB
LAB
LIMITED
LMAO
Los.Sustitutos
mkvonly

View File

@ -158,6 +158,7 @@ danger2u
danirl
Danny
Darkside.RG
DARKTIGER
DARM
DASH
DAW
@ -173,6 +174,7 @@ DEFUSED
DEiTY
DEPRAViTY
DEPRiVED
desnsurrender
DETAiLS
DEViSE
DEWSTRR
@ -197,6 +199,7 @@ DMT
DnB
DNL
DNR
dominion
DOMiNO
DON
Donatello
@ -306,6 +309,7 @@ Goblin10
Gogeta
GoLDSToNE
GOTHiC
Gothicmaster
greenbud1969
GREiD
GriOTS
@ -386,6 +390,7 @@ IMF
IMMERSE
imNaKeD
iMSORNY
iNCiTE
iND
iNFAMOUS
iNGOT
@ -409,6 +414,7 @@ JFKXVID
JJH
JoLLyRoGeR
Jozzep
JunkyCez
K-F
k2
KaKa
@ -437,6 +443,8 @@ leetay
LEGi0N
LEVERAGE
LEViTY
LGLuX
lilwoodenboy
LiMiTED
LiPAN
LMAO
@ -620,6 +628,7 @@ REAVERS
Redµx
REFiNED
RELOADED
Repivx
Republic
REPULSiON
RETRO
@ -819,6 +828,7 @@ WHiiZz
WiDE
WiKi
WiRE
WLF
WLM
WoLF
Wolky