* added extra release info

This commit is contained in:
Reinhard Pointner 2012-07-16 02:36:49 +00:00
parent 59b67d7910
commit ef80b0ec10
3 changed files with 24 additions and 9 deletions

View File

@ -3,7 +3,7 @@
def sortRegexList(path) { def sortRegexList(path) {
def set = new TreeSet(String.CASE_INSENSITIVE_ORDER) def set = new TreeSet(String.CASE_INSENSITIVE_ORDER)
new File(path).eachLine{ new File(path).eachLine('UTF-8'){
// check if regex compiles // check if regex compiles
set += java.util.regex.Pattern.compile(it).pattern() set += java.util.regex.Pattern.compile(it).pattern()
} }
@ -26,7 +26,7 @@ def movies_out = new File("website/data/movies.txt.gz")
def gz(file, lines) { def gz(file, lines) {
file.withOutputStream{ out -> file.withOutputStream{ out ->
new java.util.zip.GZIPOutputStream(out).withWriter('utf-8'){ writer -> new java.util.zip.GZIPOutputStream(out).withWriter('UTF-8'){ writer ->
lines.each{ writer.append(it).append('\n') } lines.each{ writer.append(it).append('\n') }
} }
} }
@ -91,17 +91,22 @@ println "Movie Count: " + movies.size()
// BUILD series.list.gz // BUILD series.list.gz
// TheTVDB
def thetvdb_index = new URL('http://thetvdb.com/?string=&searchseriesid=&tab=listseries&function=Search') def thetvdb_index = new URL('http://thetvdb.com/?string=&searchseriesid=&tab=listseries&function=Search')
def thetvdb_names = thetvdb_index.fetch().getHtml('UTF-8') def thetvdb_names = thetvdb_index.fetch().getHtml('UTF-8')
.depthFirst().TABLE.find{it['@id'] == "listtable"} .depthFirst().TABLE.find{it['@id'] == "listtable"}
.depthFirst().TR.findAll{ it.TD.size() == 3 && it.TD[1].text() == 'English'} .depthFirst().TR.findAll{ it.TD.size() == 3 && it.TD[1].text() == 'English'}
.findResults{ it.TD[0].A.text() } .findResults{ it.TD[0].A.text() }
// AniDB
def imdb_series_names = imdb.findAll{ it.size() >= 3 && it[1].startsWith('"') }.collect{ it[1] }
def anidb_names = net.sourceforge.filebot.WebServices.AniDB.getAnimeTitles().findResults{ [it.getPrimaryTitle(), it.getOfficialTitle('en')] }.flatten() def anidb_names = net.sourceforge.filebot.WebServices.AniDB.getAnimeTitles().findResults{ [it.getPrimaryTitle(), it.getOfficialTitle('en')] }.flatten()
/* /*
// IMDb series list
def imdb_series_names = imdb.findAll{ it.size() >= 3 && it[1].startsWith('"') }.collect{ it[1] }
// Dokuwiki list
def dokuwiki_index = new URL('http://docuwiki.net/postbot/getList.php?subject=Name') def dokuwiki_index = new URL('http://docuwiki.net/postbot/getList.php?subject=Name')
def doku_names = [] def doku_names = []
dokuwiki_index.getText('UTF-8').eachLine{ dokuwiki_index.getText('UTF-8').eachLine{
@ -109,7 +114,7 @@ dokuwiki_index.getText('UTF-8').eachLine{
} }
*/ */
def names = [thetvdb_names, imdb_series_names, anidb_names] def names = [thetvdb_names, anidb_names]
names.each{ if (it.size() == 0) throw new Exception("Failed to scrape series names") } // sanity check names.each{ if (it.size() == 0) throw new Exception("Failed to scrape series names") } // sanity check
names = names.flatten().findAll{ it =~ /^[A-Z0-9]/ && it =~ /[\p{Alpha}]{3}/}.findResults{ net.sourceforge.filebot.similarity.Normalization.normalizePunctuation(it) } // collect and normalize names names = names.flatten().findAll{ it =~ /^[A-Z0-9]/ && it =~ /[\p{Alpha}]{3}/}.findResults{ net.sourceforge.filebot.similarity.Normalization.normalizePunctuation(it) } // collect and normalize names

View File

@ -1,9 +1,6 @@
(?-i:CLASSIC|CLASSiC)
(?-i:ENGLISH) (?-i:ENGLISH)
(?-i:FRENCH) (?-i:FRENCH)
(?-i:GERMAN) (?-i:GERMAN)
(?-i:LAB)
(?-i:LIMITED|LiMiTED)
(?-i:SPANISH) (?-i:SPANISH)
(?-i:SWEDISH|SWEDiSH) (?-i:SWEDISH|SWEDiSH)
.+sample$ .+sample$
@ -19,13 +16,13 @@
^AUDIO_TS$ ^AUDIO_TS$
^BDMV$ ^BDMV$
^Cover ^Cover
^download[s]?$
^DVD ^DVD
^Film[s]? ^Film[s]?
^HVDVD_TS$ ^HVDVD_TS$
^Movie[s]? ^Movie[s]?
^new$ ^new$
^other$ ^other$
^SAMPLE
^Season.[0-9]+ ^Season.[0-9]+
^Torrents[s]? ^Torrents[s]?
^Tracker ^Tracker
@ -43,6 +40,7 @@ CBC
CD[0]?[1-3] CD[0]?[1-3]
Channel.4 Channel.4
Channel.5 Channel.5
CLASSIC
CN CN
CVCD CVCD
DC DC
@ -83,6 +81,8 @@ k.tk.crew
KIDZCORNER KIDZCORNER
KOR KOR
KORSUB KORSUB
LAB
LIMITED
LMAO LMAO
Los.Sustitutos Los.Sustitutos
mkvonly mkvonly

View File

@ -158,6 +158,7 @@ danger2u
danirl danirl
Danny Danny
Darkside.RG Darkside.RG
DARKTIGER
DARM DARM
DASH DASH
DAW DAW
@ -173,6 +174,7 @@ DEFUSED
DEiTY DEiTY
DEPRAViTY DEPRAViTY
DEPRiVED DEPRiVED
desnsurrender
DETAiLS DETAiLS
DEViSE DEViSE
DEWSTRR DEWSTRR
@ -197,6 +199,7 @@ DMT
DnB DnB
DNL DNL
DNR DNR
dominion
DOMiNO DOMiNO
DON DON
Donatello Donatello
@ -306,6 +309,7 @@ Goblin10
Gogeta Gogeta
GoLDSToNE GoLDSToNE
GOTHiC GOTHiC
Gothicmaster
greenbud1969 greenbud1969
GREiD GREiD
GriOTS GriOTS
@ -386,6 +390,7 @@ IMF
IMMERSE IMMERSE
imNaKeD imNaKeD
iMSORNY iMSORNY
iNCiTE
iND iND
iNFAMOUS iNFAMOUS
iNGOT iNGOT
@ -409,6 +414,7 @@ JFKXVID
JJH JJH
JoLLyRoGeR JoLLyRoGeR
Jozzep Jozzep
JunkyCez
K-F K-F
k2 k2
KaKa KaKa
@ -437,6 +443,8 @@ leetay
LEGi0N LEGi0N
LEVERAGE LEVERAGE
LEViTY LEViTY
LGLuX
lilwoodenboy
LiMiTED LiMiTED
LiPAN LiPAN
LMAO LMAO
@ -620,6 +628,7 @@ REAVERS
Redµx Redµx
REFiNED REFiNED
RELOADED RELOADED
Repivx
Republic Republic
REPULSiON REPULSiON
RETRO RETRO
@ -819,6 +828,7 @@ WHiiZz
WiDE WiDE
WiKi WiKi
WiRE WiRE
WLF
WLM WLM
WoLF WoLF
Wolky Wolky