diff --git a/BuildData.groovy b/BuildData.groovy index 9e901eda..2effb187 100644 --- a/BuildData.groovy +++ b/BuildData.groovy @@ -3,7 +3,7 @@ def sortRegexList(path) { def set = new TreeSet(String.CASE_INSENSITIVE_ORDER) - new File(path).eachLine{ + new File(path).eachLine('UTF-8'){ // check if regex compiles set += java.util.regex.Pattern.compile(it).pattern() } @@ -26,7 +26,7 @@ def movies_out = new File("website/data/movies.txt.gz") def gz(file, lines) { file.withOutputStream{ out -> - new java.util.zip.GZIPOutputStream(out).withWriter('utf-8'){ writer -> + new java.util.zip.GZIPOutputStream(out).withWriter('UTF-8'){ writer -> lines.each{ writer.append(it).append('\n') } } } @@ -91,17 +91,22 @@ println "Movie Count: " + movies.size() // BUILD series.list.gz + +// TheTVDB def thetvdb_index = new URL('http://thetvdb.com/?string=&searchseriesid=&tab=listseries&function=Search') def thetvdb_names = thetvdb_index.fetch().getHtml('UTF-8') .depthFirst().TABLE.find{it['@id'] == "listtable"} .depthFirst().TR.findAll{ it.TD.size() == 3 && it.TD[1].text() == 'English'} .findResults{ it.TD[0].A.text() } - -def imdb_series_names = imdb.findAll{ it.size() >= 3 && it[1].startsWith('"') }.collect{ it[1] } +// AniDB def anidb_names = net.sourceforge.filebot.WebServices.AniDB.getAnimeTitles().findResults{ [it.getPrimaryTitle(), it.getOfficialTitle('en')] }.flatten() /* +// IMDb series list +def imdb_series_names = imdb.findAll{ it.size() >= 3 && it[1].startsWith('"') }.collect{ it[1] } + +// Dokuwiki list def dokuwiki_index = new URL('http://docuwiki.net/postbot/getList.php?subject=Name') def doku_names = [] dokuwiki_index.getText('UTF-8').eachLine{ @@ -109,7 +114,7 @@ dokuwiki_index.getText('UTF-8').eachLine{ } */ -def names = [thetvdb_names, imdb_series_names, anidb_names] +def names = [thetvdb_names, anidb_names] names.each{ if (it.size() == 0) throw new Exception("Failed to scrape series names") } // sanity check names = names.flatten().findAll{ it =~ /^[A-Z0-9]/ && it =~ /[\p{Alpha}]{3}/}.findResults{ net.sourceforge.filebot.similarity.Normalization.normalizePunctuation(it) } // collect and normalize names diff --git a/website/data/query-blacklist.txt b/website/data/query-blacklist.txt index d50ed7d9..18b48ce9 100644 --- a/website/data/query-blacklist.txt +++ b/website/data/query-blacklist.txt @@ -1,9 +1,6 @@ -(?-i:CLASSIC|CLASSiC) (?-i:ENGLISH) (?-i:FRENCH) (?-i:GERMAN) -(?-i:LAB) -(?-i:LIMITED|LiMiTED) (?-i:SPANISH) (?-i:SWEDISH|SWEDiSH) .+sample$ @@ -19,13 +16,13 @@ ^AUDIO_TS$ ^BDMV$ ^Cover +^download[s]?$ ^DVD ^Film[s]? ^HVDVD_TS$ ^Movie[s]? ^new$ ^other$ -^SAMPLE ^Season.[0-9]+ ^Torrents[s]? ^Tracker @@ -43,6 +40,7 @@ CBC CD[0]?[1-3] Channel.4 Channel.5 +CLASSIC CN CVCD DC @@ -83,6 +81,8 @@ k.tk.crew KIDZCORNER KOR KORSUB +LAB +LIMITED LMAO Los.Sustitutos mkvonly diff --git a/website/data/release-groups.txt b/website/data/release-groups.txt index ab520a9c..b98ddb5e 100644 --- a/website/data/release-groups.txt +++ b/website/data/release-groups.txt @@ -158,6 +158,7 @@ danger2u danirl Danny Darkside.RG +DARKTIGER DARM DASH DAW @@ -173,6 +174,7 @@ DEFUSED DEiTY DEPRAViTY DEPRiVED +desnsurrender DETAiLS DEViSE DEWSTRR @@ -197,6 +199,7 @@ DMT DnB DNL DNR +dominion DOMiNO DON Donatello @@ -306,6 +309,7 @@ Goblin10 Gogeta GoLDSToNE GOTHiC +Gothicmaster greenbud1969 GREiD GriOTS @@ -386,6 +390,7 @@ IMF IMMERSE imNaKeD iMSORNY +iNCiTE iND iNFAMOUS iNGOT @@ -409,6 +414,7 @@ JFKXVID JJH JoLLyRoGeR Jozzep +JunkyCez K-F k2 KaKa @@ -437,6 +443,8 @@ leetay LEGi0N LEVERAGE LEViTY +LGLuX +lilwoodenboy LiMiTED LiPAN LMAO @@ -620,6 +628,7 @@ REAVERS RedĀµx REFiNED RELOADED +Repivx Republic REPULSiON RETRO @@ -819,6 +828,7 @@ WHiiZz WiDE WiKi WiRE +WLF WLM WoLF Wolky