* added extra release info
This commit is contained in:
parent
59b67d7910
commit
ef80b0ec10
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
def sortRegexList(path) {
|
def sortRegexList(path) {
|
||||||
def set = new TreeSet(String.CASE_INSENSITIVE_ORDER)
|
def set = new TreeSet(String.CASE_INSENSITIVE_ORDER)
|
||||||
new File(path).eachLine{
|
new File(path).eachLine('UTF-8'){
|
||||||
// check if regex compiles
|
// check if regex compiles
|
||||||
set += java.util.regex.Pattern.compile(it).pattern()
|
set += java.util.regex.Pattern.compile(it).pattern()
|
||||||
}
|
}
|
||||||
|
@ -26,7 +26,7 @@ def movies_out = new File("website/data/movies.txt.gz")
|
||||||
|
|
||||||
def gz(file, lines) {
|
def gz(file, lines) {
|
||||||
file.withOutputStream{ out ->
|
file.withOutputStream{ out ->
|
||||||
new java.util.zip.GZIPOutputStream(out).withWriter('utf-8'){ writer ->
|
new java.util.zip.GZIPOutputStream(out).withWriter('UTF-8'){ writer ->
|
||||||
lines.each{ writer.append(it).append('\n') }
|
lines.each{ writer.append(it).append('\n') }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -91,17 +91,22 @@ println "Movie Count: " + movies.size()
|
||||||
|
|
||||||
|
|
||||||
// BUILD series.list.gz
|
// BUILD series.list.gz
|
||||||
|
|
||||||
|
// TheTVDB
|
||||||
def thetvdb_index = new URL('http://thetvdb.com/?string=&searchseriesid=&tab=listseries&function=Search')
|
def thetvdb_index = new URL('http://thetvdb.com/?string=&searchseriesid=&tab=listseries&function=Search')
|
||||||
def thetvdb_names = thetvdb_index.fetch().getHtml('UTF-8')
|
def thetvdb_names = thetvdb_index.fetch().getHtml('UTF-8')
|
||||||
.depthFirst().TABLE.find{it['@id'] == "listtable"}
|
.depthFirst().TABLE.find{it['@id'] == "listtable"}
|
||||||
.depthFirst().TR.findAll{ it.TD.size() == 3 && it.TD[1].text() == 'English'}
|
.depthFirst().TR.findAll{ it.TD.size() == 3 && it.TD[1].text() == 'English'}
|
||||||
.findResults{ it.TD[0].A.text() }
|
.findResults{ it.TD[0].A.text() }
|
||||||
|
|
||||||
|
// AniDB
|
||||||
def imdb_series_names = imdb.findAll{ it.size() >= 3 && it[1].startsWith('"') }.collect{ it[1] }
|
|
||||||
def anidb_names = net.sourceforge.filebot.WebServices.AniDB.getAnimeTitles().findResults{ [it.getPrimaryTitle(), it.getOfficialTitle('en')] }.flatten()
|
def anidb_names = net.sourceforge.filebot.WebServices.AniDB.getAnimeTitles().findResults{ [it.getPrimaryTitle(), it.getOfficialTitle('en')] }.flatten()
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
// IMDb series list
|
||||||
|
def imdb_series_names = imdb.findAll{ it.size() >= 3 && it[1].startsWith('"') }.collect{ it[1] }
|
||||||
|
|
||||||
|
// Dokuwiki list
|
||||||
def dokuwiki_index = new URL('http://docuwiki.net/postbot/getList.php?subject=Name')
|
def dokuwiki_index = new URL('http://docuwiki.net/postbot/getList.php?subject=Name')
|
||||||
def doku_names = []
|
def doku_names = []
|
||||||
dokuwiki_index.getText('UTF-8').eachLine{
|
dokuwiki_index.getText('UTF-8').eachLine{
|
||||||
|
@ -109,7 +114,7 @@ dokuwiki_index.getText('UTF-8').eachLine{
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
def names = [thetvdb_names, imdb_series_names, anidb_names]
|
def names = [thetvdb_names, anidb_names]
|
||||||
names.each{ if (it.size() == 0) throw new Exception("Failed to scrape series names") } // sanity check
|
names.each{ if (it.size() == 0) throw new Exception("Failed to scrape series names") } // sanity check
|
||||||
names = names.flatten().findAll{ it =~ /^[A-Z0-9]/ && it =~ /[\p{Alpha}]{3}/}.findResults{ net.sourceforge.filebot.similarity.Normalization.normalizePunctuation(it) } // collect and normalize names
|
names = names.flatten().findAll{ it =~ /^[A-Z0-9]/ && it =~ /[\p{Alpha}]{3}/}.findResults{ net.sourceforge.filebot.similarity.Normalization.normalizePunctuation(it) } // collect and normalize names
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,6 @@
|
||||||
(?-i:CLASSIC|CLASSiC)
|
|
||||||
(?-i:ENGLISH)
|
(?-i:ENGLISH)
|
||||||
(?-i:FRENCH)
|
(?-i:FRENCH)
|
||||||
(?-i:GERMAN)
|
(?-i:GERMAN)
|
||||||
(?-i:LAB)
|
|
||||||
(?-i:LIMITED|LiMiTED)
|
|
||||||
(?-i:SPANISH)
|
(?-i:SPANISH)
|
||||||
(?-i:SWEDISH|SWEDiSH)
|
(?-i:SWEDISH|SWEDiSH)
|
||||||
.+sample$
|
.+sample$
|
||||||
|
@ -19,13 +16,13 @@
|
||||||
^AUDIO_TS$
|
^AUDIO_TS$
|
||||||
^BDMV$
|
^BDMV$
|
||||||
^Cover
|
^Cover
|
||||||
|
^download[s]?$
|
||||||
^DVD
|
^DVD
|
||||||
^Film[s]?
|
^Film[s]?
|
||||||
^HVDVD_TS$
|
^HVDVD_TS$
|
||||||
^Movie[s]?
|
^Movie[s]?
|
||||||
^new$
|
^new$
|
||||||
^other$
|
^other$
|
||||||
^SAMPLE
|
|
||||||
^Season.[0-9]+
|
^Season.[0-9]+
|
||||||
^Torrents[s]?
|
^Torrents[s]?
|
||||||
^Tracker
|
^Tracker
|
||||||
|
@ -43,6 +40,7 @@ CBC
|
||||||
CD[0]?[1-3]
|
CD[0]?[1-3]
|
||||||
Channel.4
|
Channel.4
|
||||||
Channel.5
|
Channel.5
|
||||||
|
CLASSIC
|
||||||
CN
|
CN
|
||||||
CVCD
|
CVCD
|
||||||
DC
|
DC
|
||||||
|
@ -83,6 +81,8 @@ k.tk.crew
|
||||||
KIDZCORNER
|
KIDZCORNER
|
||||||
KOR
|
KOR
|
||||||
KORSUB
|
KORSUB
|
||||||
|
LAB
|
||||||
|
LIMITED
|
||||||
LMAO
|
LMAO
|
||||||
Los.Sustitutos
|
Los.Sustitutos
|
||||||
mkvonly
|
mkvonly
|
||||||
|
|
|
@ -158,6 +158,7 @@ danger2u
|
||||||
danirl
|
danirl
|
||||||
Danny
|
Danny
|
||||||
Darkside.RG
|
Darkside.RG
|
||||||
|
DARKTIGER
|
||||||
DARM
|
DARM
|
||||||
DASH
|
DASH
|
||||||
DAW
|
DAW
|
||||||
|
@ -173,6 +174,7 @@ DEFUSED
|
||||||
DEiTY
|
DEiTY
|
||||||
DEPRAViTY
|
DEPRAViTY
|
||||||
DEPRiVED
|
DEPRiVED
|
||||||
|
desnsurrender
|
||||||
DETAiLS
|
DETAiLS
|
||||||
DEViSE
|
DEViSE
|
||||||
DEWSTRR
|
DEWSTRR
|
||||||
|
@ -197,6 +199,7 @@ DMT
|
||||||
DnB
|
DnB
|
||||||
DNL
|
DNL
|
||||||
DNR
|
DNR
|
||||||
|
dominion
|
||||||
DOMiNO
|
DOMiNO
|
||||||
DON
|
DON
|
||||||
Donatello
|
Donatello
|
||||||
|
@ -306,6 +309,7 @@ Goblin10
|
||||||
Gogeta
|
Gogeta
|
||||||
GoLDSToNE
|
GoLDSToNE
|
||||||
GOTHiC
|
GOTHiC
|
||||||
|
Gothicmaster
|
||||||
greenbud1969
|
greenbud1969
|
||||||
GREiD
|
GREiD
|
||||||
GriOTS
|
GriOTS
|
||||||
|
@ -386,6 +390,7 @@ IMF
|
||||||
IMMERSE
|
IMMERSE
|
||||||
imNaKeD
|
imNaKeD
|
||||||
iMSORNY
|
iMSORNY
|
||||||
|
iNCiTE
|
||||||
iND
|
iND
|
||||||
iNFAMOUS
|
iNFAMOUS
|
||||||
iNGOT
|
iNGOT
|
||||||
|
@ -409,6 +414,7 @@ JFKXVID
|
||||||
JJH
|
JJH
|
||||||
JoLLyRoGeR
|
JoLLyRoGeR
|
||||||
Jozzep
|
Jozzep
|
||||||
|
JunkyCez
|
||||||
K-F
|
K-F
|
||||||
k2
|
k2
|
||||||
KaKa
|
KaKa
|
||||||
|
@ -437,6 +443,8 @@ leetay
|
||||||
LEGi0N
|
LEGi0N
|
||||||
LEVERAGE
|
LEVERAGE
|
||||||
LEViTY
|
LEViTY
|
||||||
|
LGLuX
|
||||||
|
lilwoodenboy
|
||||||
LiMiTED
|
LiMiTED
|
||||||
LiPAN
|
LiPAN
|
||||||
LMAO
|
LMAO
|
||||||
|
@ -620,6 +628,7 @@ REAVERS
|
||||||
Redµx
|
Redµx
|
||||||
REFiNED
|
REFiNED
|
||||||
RELOADED
|
RELOADED
|
||||||
|
Repivx
|
||||||
Republic
|
Republic
|
||||||
REPULSiON
|
REPULSiON
|
||||||
RETRO
|
RETRO
|
||||||
|
@ -819,6 +828,7 @@ WHiiZz
|
||||||
WiDE
|
WiDE
|
||||||
WiKi
|
WiKi
|
||||||
WiRE
|
WiRE
|
||||||
|
WLF
|
||||||
WLM
|
WLM
|
||||||
WoLF
|
WoLF
|
||||||
Wolky
|
Wolky
|
||||||
|
|
Loading…
Reference in New Issue