* build my own imdb index from osdb movie data for ids and using my imdb scraper to get the original aka names

* lots of extra RG names and blacklisted terms (esp useful for dokus)
* updated cleaner script to handle video clutter like samples etc
This commit is contained in:
Reinhard Pointner 2012-07-13 11:41:50 +00:00
parent 7cf02bb235
commit d29fe49390
5 changed files with 243 additions and 44 deletions

View File

@ -1,7 +1,28 @@
// filebot -script BuildData.groovy -trust-script // filebot -script BuildData.groovy
def s_out = new File("website/data/series.list.gz")
def m_out = new File("website/data/movies.txt.gz") def sortRegexList(path) {
def set = new TreeSet(String.CASE_INSENSITIVE_ORDER)
new File(path).eachLine{
// check if regex compiles
set += java.util.regex.Pattern.compile(it).pattern()
}
def out = set.join('\n').saveAs(path)
println "$out\n$out.text\n"
}
// sort and check shared regex collections
sortRegexList("website/data/release-groups.txt")
sortRegexList("website/data/query-blacklist.txt")
// ------------------------------------------------------------------------- //
def series_out = new File("website/data/series.list.gz")
def movies_out = new File("website/data/movies.txt.gz")
def gz(file, lines) { def gz(file, lines) {
file.withOutputStream{ out -> file.withOutputStream{ out ->
@ -15,20 +36,54 @@ def gz(file, lines) {
// ------------------------------------------------------------------------- // // ------------------------------------------------------------------------- //
// BUILD movies.txt.gz // LOAD osdb-imdb.txt (already verified data)
def tsv = new URL("http://www.opensubtitles.org/addons/export_movie.php") def imdb_tsv = new File("website/data/osdb-imdb.txt")
def movies = [] def imdb = [].asSynchronized() // thread-safe list
tsv.text.eachLine{ imdb_tsv.getText('UTF-8').eachLine{
imdb << it.split(/\t/)
}
imdb_ids = new HashSet(imdb.collect{ it[0] })
// BUILD movies.txt.gz
def osdb_tsv = new URL("http://www.opensubtitles.org/addons/export_movie.php")
def osdb = []
osdb_tsv.getText('UTF-8').eachLine{
def line = it.split(/\t/)*.replaceAll(/\s+/, ' ')*.trim() def line = it.split(/\t/)*.replaceAll(/\s+/, ' ')*.trim()
if (line.size() == 4 && line[0] =~ /\d+/) { if (line.size() == 4 && line[0] =~ /\d+/) {
movies.add([line[1].toInteger(), line[2], line[3].toInteger()]) osdb << [line[1].toInteger(), line[2], line[3].toInteger()]
} }
} }
osdb = osdb.findAll{ it[0] <= 9999999 && it[2] >= 1930 && it[1] =~ /^[A-Z0-9]/ && it[1] =~ /[\p{Alpha}]{3}/ }.collect{ [it[0].pad(7), it[1], it[2]] }
movies = movies.findAll{ it[0] <= 9999999 && it[2] >= 1930 && it[1] =~ /^[A-Z0-9]/ && it[1] =~ /[\p{Alpha}]{3}/ }.sort{ it[1] }
gz(m_out, movies.collect{ [it[0].pad(7), it[1], it[2]].join('\t') }) parallel(osdb.collect{ row ->
return {
// update new data
if (!imdb_ids.contains(row[0])) {
def mov = net.sourceforge.filebot.WebServices.IMDb.getMovieDescriptor(row[0] as int, null)
if (mov != null && mov.name.length() > 0 && mov.year > 0) {
println "Adding $mov"
imdb << [row[0], mov.name, mov.year]
} else {
println "Blacklisting $row"
imdb << [row[0], null]
}
}
}
}, 20)
// save updated imdb data
imdb.collect{ it.join('\t') }.join('\n').saveAs(imdb_tsv)
// save movie data
def movies = imdb.findAll{ it.size() >= 3 && !it[1].startsWith('"') }
def movieSorter = new TreeMap(String.CASE_INSENSITIVE_ORDER)
movies.each{ movieSorter.put(it[1], it) }
movies = movieSorter.values().collect{ it.join('\t') }
gz(movies_out, movies)
println "Movie Count: " + movies.size() println "Movie Count: " + movies.size()
@ -36,27 +91,32 @@ println "Movie Count: " + movies.size()
// BUILD series.list.gz // BUILD series.list.gz
def page = new URL('http://thetvdb.com/?string=&searchseriesid=&tab=listseries&function=Search') def thetvdb_index = new URL('http://thetvdb.com/?string=&searchseriesid=&tab=listseries&function=Search')
def thetvdb_names = thetvdb_index.fetch().getHtml('UTF-8')
def names = page.fetch().getHtml('utf-8')
.depthFirst().TABLE.find{it['@id'] == "listtable"} .depthFirst().TABLE.find{it['@id'] == "listtable"}
.depthFirst().TR.findAll{ it.TD.size() == 3 && it.TD[1].text() == 'English'} .depthFirst().TR.findAll{ it.TD.size() == 3 && it.TD[1].text() == 'English'}
.findResults{ it.TD[0].A.text() } .findResults{ it.TD[0].A.text() }
if (names.size() == 0) {
throw new Exception("Failed to scrape series names") def imdb_series_names = imdb.findAll{ it.size() >= 3 && it[1].startsWith('"') }.collect{ it[1] }
def anidb_names = net.sourceforge.filebot.WebServices.AniDB.getAnimeTitles().findResults{ [it.getPrimaryTitle(), it.getOfficialTitle('en')] }.flatten()
/*
def dokuwiki_index = new URL('http://docuwiki.net/postbot/getList.php?subject=Name')
def doku_names = []
dokuwiki_index.getText('UTF-8').eachLine{
doku_names << it.trim().replaceTrailingBrackets()
} }
*/
def anime = net.sourceforge.filebot.WebServices.AniDB.getAnimeTitles() def names = [thetvdb_names, imdb_series_names, anidb_names]
names += anime.findResults{ it.getPrimaryTitle() } names.each{ if (it.size() == 0) throw new Exception("Failed to scrape series names") } // sanity check
names += anime.findResults{ it.getOfficialTitle('en') } names = names.flatten().findAll{ it =~ /^[A-Z0-9]/ && it =~ /[\p{Alpha}]{3}/}.findResults{ net.sourceforge.filebot.similarity.Normalization.normalizePunctuation(it) } // collect and normalize names
names = names.findAll{ it =~ /^[A-Z0-9]/ && it =~ /[\p{Alpha}]{3}/}.findResults{ net.sourceforge.filebot.similarity.Normalization.normalizePunctuation(it) } def seriesSorter = new TreeSet(String.CASE_INSENSITIVE_ORDER)
seriesSorter.addAll(names)
def unique = new TreeSet(String.CASE_INSENSITIVE_ORDER) names = seriesSorter as List
unique.addAll(names)
names = unique as List
gz(s_out, names) gz(series_out, names)
println "Series Count: " + names.size() println "Series Count: " + names.size()

View File

@ -99,7 +99,7 @@ public class IMDbClient implements MovieIdentificationService {
if (header.toUpperCase().contains("(VG)")) // ignore video games if (header.toUpperCase().contains("(VG)")) // ignore video games
return null; return null;
String name = selectString("//H1/A/text()", dom); String name = selectString("//H1/A/text()", dom).replaceAll("\\s+", " ").trim();
String year = new Scanner(selectString("//H1/A/following::A/text()", dom)).useDelimiter("\\D+").next(); String year = new Scanner(selectString("//H1/A/following::A/text()", dom)).useDelimiter("\\D+").next();
String url = selectString("//H1/A/@href", dom); String url = selectString("//H1/A/@href", dom);
return new Movie(name, Pattern.matches("\\d{4}", year) ? Integer.parseInt(year) : -1, getImdbId(url)); return new Movie(name, Pattern.matches("\\d{4}", year) ? Integer.parseInt(year) : -1, getImdbId(url));

View File

@ -2,6 +2,7 @@
(?-i:ENGLISH) (?-i:ENGLISH)
(?-i:FRENCH) (?-i:FRENCH)
(?-i:GERMAN) (?-i:GERMAN)
(?-i:LAB)
(?-i:LIMITED|LiMiTED) (?-i:LIMITED|LiMiTED)
(?-i:SPANISH) (?-i:SPANISH)
(?-i:SWEDISH|SWEDiSH) (?-i:SWEDISH|SWEDiSH)
@ -32,16 +33,23 @@
^VCD$ ^VCD$
^VIDEO_TS$ ^VIDEO_TS$
A.Release.Lounge A.Release.Lounge
ABC
Anime[s]? Anime[s]?
Arte
BBC BBC
btarena.org
By.Cool.Release By.Cool.Release
CBC
CD[0]?[1-3] CD[0]?[1-3]
Channel.4
Channel.5
CN CN
CVCD CVCD
DC DC
Demonoid Demonoid
Director's.Cut Director's.Cut
Directors.Cut Directors.Cut
Discovery.Channel
docu docu
Dual.Audio Dual.Audio
dubbed dubbed
@ -58,8 +66,10 @@ Fra
FRE FRE
GER GER
Hard.Subbed Hard.Subbed
HBO
HDRip HDRip
Hindi Hindi
History.Channel
HQ HQ
info info
iNT iNT
@ -69,6 +79,7 @@ ISO
iTA iTA
iTALIA iTALIA
jigaxx jigaxx
k.tk.crew
KIDZCORNER KIDZCORNER
KOR KOR
KORSUB KORSUB
@ -78,9 +89,15 @@ mkvonly
Movies Movies
MultiSub MultiSub
MVGroup.org MVGroup.org
National.Geographic
NFO
NG
NHK
NL NL
NL.Subs NL.Subs
NLT NLT
o2.pl
PBS
Pre.?DVD Pre.?DVD
PROPER PROPER
PSP PSP
@ -92,8 +109,8 @@ ReRip
RESYNC RESYNC
RETAIL RETAIL
RiffTrax RiffTrax
Sample sample[s]?
sample[s]?$ SBS
Screenshot Screenshot
ShareGo ShareGo
ShareReactor ShareReactor
@ -121,10 +138,7 @@ UNCUT
unrated unrated
unrated.edition unrated.edition
UsaBit.com UsaBit.com
Video[s]? video[s]?
www.speed.cd www[.][\w-.]+[.](com|net|tk|ro|cd)
www.torentz.3xforum.ro
www.Torrenting.com
www[.]
xRipp xRipp
Zune Zune

View File

@ -12,6 +12,7 @@
3LT0N 3LT0N
420RipZ 420RipZ
4HM 4HM
666
7SiNS 7SiNS
850105 850105
a-S a-S
@ -21,6 +22,8 @@ AaS
aBD aBD
AbSurdity AbSurdity
aceford aceford
ACF
AckTiv3
ADHD ADHD
AE AE
AEGiS AEGiS
@ -47,15 +50,18 @@ ARiGOLD
ARROW ARROW
ArtSubs ArtSubs
ASAP ASAP
Atlas47
ATTENTATET ATTENTATET
AVCHD AVCHD
AVS720 AVS720
AW AW
aWake aWake
AXE
aXXo aXXo
AZuRRaY AZuRRaY
babylonad babylonad
BAJSKORV BAJSKORV
BaLD
BamHD BamHD
Barba Barba
BaSS BaSS
@ -64,20 +70,25 @@ bc10
BDClub BDClub
BDiSC BDiSC
beAst beAst
BEEF.STEW
BeStDivX BeStDivX
BestHD BestHD
BiA BiA
BiDA BiDA
Billman424
Blixten
BLOW BLOW
Blu-bits Blu-bits
BluDragon BluDragon
BlueBird BlueBird
blueF blueF
Bluereaper
BlueTV BlueTV
BLUEYES BLUEYES
blueZilla blueZilla
BluWave BluWave
BMB BMB
BoBo
BORGATA BORGATA
bReAK bReAK
BrG BrG
@ -86,13 +97,16 @@ BRMP
BRUTUS BRUTUS
BRZONE BRZONE
BTSD BTSD
BTSFilms
BTT BTT
BugZ BugZ
BULLDOZER BULLDOZER
BUNNY BUNNY
BurnFre
BWB BWB
C4TV C4TV
CAMELOT CAMELOT
catflap
CBGB CBGB
CDD CDD
CDDHD CDDHD
@ -119,25 +133,31 @@ cntc
COALiTiON COALiTiON
Cocksure Cocksure
COMPULSION COMPULSION
Connaz-AKA-MrPirate
cottage cottage
COWiSO COWiSO
CPtScene CPtScene
CPY CPY
CREEDANCE
CRF CRF
CRIMSON CRIMSON
CRiSC CRiSC
CROSSBOW CROSSBOW
CRYS CRYS
CSHD CSHD
CTD
CtrlHD CtrlHD
CTU CTU
CULTHD CULTHD
CuMBuCKeTS CuMBuCKeTS
CYBERMEN CYBERMEN
CyberTyger
D-Z0N3 D-Z0N3
D3Si D3Si
danger2u danger2u
danirl danirl
Danny
Darkside.RG
DARM DARM
DASH DASH
DAW DAW
@ -171,16 +191,22 @@ DiTa
DiVERSiTY DiVERSiTY
DivXNL DivXNL
DivXNL-Team DivXNL-Team
DjRobo38
dmd
DMT DMT
DnB DnB
DNL DNL
DNR DNR
DOMiNO
DON DON
Donatello
DoNE DoNE
DOT DOT
DOUBT DOUBT
Dowcker
DOWN DOWN
DRHD DRHD
DrSn
DUPLI DUPLI
DUQA DUQA
DutchReleaseTeam DutchReleaseTeam
@ -198,6 +224,7 @@ Ekolb
Electri4ka Electri4ka
ELECTRiC ELECTRiC
Electrichka Electrichka
ELiA
elizabethtga elizabethtga
EM0C0RE EM0C0RE
EmC EmC
@ -225,11 +252,13 @@ EXViD
eztv eztv
FaNSuB FaNSuB
FASM FASM
FEAR
FELONY FELONY
FFNDVD FFNDVD
FHD FHD
FHM FHM
FiCO FiCO
FiddleGoose
FiHTV FiHTV
FilmHD FilmHD
FiNaLe FiNaLe
@ -268,6 +297,7 @@ Gazdi
GB GB
GECKOS GECKOS
GEHENNA GEHENNA
Genesis-RG
GFW GFW
GFY GFY
GiNJi GiNJi
@ -276,6 +306,8 @@ Goblin10
Gogeta Gogeta
GoLDSToNE GoLDSToNE
GOTHiC GOTHiC
greenbud1969
GREiD
GriOTS GriOTS
Grond Grond
gudhak gudhak
@ -286,6 +318,7 @@ HaB
HAGGiS HAGGiS
HAiDEAF HAiDEAF
HALCYON HALCYON
Hammer71
HANGOVER HANGOVER
hannibal hannibal
HCA HCA
@ -317,10 +350,15 @@ HDX
HDxT HDxT
Helix Helix
HHH HHH
HHI
HIDD3N
HiDt HiDt
HiFi HiFi
HiGHTIMES HiGHTIMES
HiNT HiNT
Hivrolta
HLS
HNR
HoodBag HoodBag
HORiZON HORiZON
HOWL HOWL
@ -340,6 +378,8 @@ IGUANA
iKA iKA
iLG iLG
iLL iLL
iLLUSiON
imacRuel1
iMAGiNE iMAGiNE
iMBT iMBT
IMF IMF
@ -352,7 +392,9 @@ iNGOT
InSaNiTy InSaNiTy
iNSECTS iNSECTS
iNSPiRED iNSPiRED
IntelliQ
iNTERNAL iNTERNAL
iNTiMiD
INtL INtL
iNVANDRAREN iNVANDRAREN
iON iON
@ -361,46 +403,64 @@ ITZ
Japhson Japhson
JAVLiU JAVLiU
JCH JCH
jedi
JENC JENC
JFKXVID
JJH JJH
JoLLyRoGeR JoLLyRoGeR
Jozzep
K-F K-F
k2 k2
KaKa KaKa
kamera kamera
KEG
keltz keltz
KiLT
KiNGS KiNGS
kirklestat
KLAXXON KLAXXON
KlockreN KlockreN
KNIGHTY1973
KOENiG KOENiG
Koffe
Kole
KonzillaRG KonzillaRG
KooKoo
KRaLiMaRKo KRaLiMaRKo
Kuth
KYR KYR
Kyuubi Kyuubi
LamB LamB
Larceny Larceny
LCHD LCHD
leetay
LEVERAGE LEVERAGE
LEViTY LEViTY
LiMiTED
LiPAN LiPAN
LMAO LMAO
LMG
LoD LoD
LOL LOL
LOLCATS LOLCATS
LoneWolf LoneWolf
LOST LOST
LP LP
lrc
LRH
LTRG LTRG
LTT LTT
LUSO LUSO
M794 M794
MACHD MACHD
macro macro
madeec
MAGiCAL MAGiCAL
MAGiCViBE MAGiCViBE
MAiN MAiN
MainEvent MainEvent
MARiNES MARiNES
marioBombo
MAXSPEED MAXSPEED
MC MC
MCR MCR
@ -408,12 +468,16 @@ med
MEDiAMANiACS MEDiAMANiACS
MEDiEVAL MEDiEVAL
MELiTE MELiTE
Mental.RG
MeTH MeTH
METiS METiS
MHQ MHQ
Mikoto
MiND MiND
MiNT MiNT
MiRAGETV MiRAGETV
Mish
MissRipZ
MMI MMI
MoF MoF
MOMENTUM MOMENTUM
@ -434,8 +498,11 @@ N-F
NaRB NaRB
Narutoverse Narutoverse
NBS NBS
NDRT
NeDiVx NeDiVx
NEPTUNE
NERDHD NERDHD
NeRoZ
NEW.SOURCE NEW.SOURCE
NewArtRiot NewArtRiot
NFHD NFHD
@ -444,6 +511,7 @@ NGXHD
NhaNc3 NhaNc3
NiBURU NiBURU
NiF NiF
NikonXP
Nile Nile
NiX NiX
NL.Subs NL.Subs
@ -460,14 +528,19 @@ NPW
NSUBS NSUBS
NT NT
NTb NTb
NTF
NuMy
NUXX
NWO NWO
NyTT NyTT
OAS OAS
Occor
OEM OEM
OEM1080 OEM1080
Omifast Omifast
OmU OmU
ONYX ONYX
OPT!V!D
ORC ORC
ORENJi ORENJi
ORPHEUS ORPHEUS
@ -476,9 +549,11 @@ OSiTV
OUTDATED OUTDATED
OZC OZC
P0W4 P0W4
P4DGE
Pa@Ph Pa@Ph
PADDO PADDO
papi papi
PAROVOZ
PARTiCLE PARTiCLE
PaYxXx PaYxXx
PeeWee PeeWee
@ -502,9 +577,11 @@ PoTuS
PP PP
PPQ PPQ
PRECiOUS PRECiOUS
prevail
Prime Prime
PriMeHD PriMeHD
PRiNCE PRiNCE
prithwi
PRoDJi PRoDJi
PROGRESS PROGRESS
PROPHETS PROPHETS
@ -523,16 +600,19 @@ Purana
PURE PURE
PUZZLE PUZZLE
PxHD PxHD
PZE
Q0S Q0S
QCF QCF
QDP QDP
QiX QiX
QSP QSP
Quali.SlaYer
QXE QXE
R&C R&C
rabomil rabomil
RANDi RANDi
RAP RAP
Rare.Share
Razor1911 Razor1911
Reaperza Reaperza
REAVERS REAVERS
@ -541,6 +621,8 @@ REFiNED
RELOADED RELOADED
Republic Republic
REPULSiON REPULSiON
RETRO
Rets
REVEiLLE REVEiLLE
REWARD REWARD
RightSiZE RightSiZE
@ -549,8 +631,10 @@ RiPTATORz
RiTALiX RiTALiX
RiVER RiVER
RMT RMT
RoCK&BlueLadyRG
RoCKRioT RoCKRioT
ROVERS ROVERS
RS
RSG RSG
RTA RTA
RUBY RUBY
@ -568,18 +652,22 @@ SAMFD
SANTI SANTI
SAPHiRE SAPHiRE
Sapphire Sapphire
SATIVA
SChiZO SChiZO
Scratch404 Scratch404
Scratched Scratched
SCREAM
ScWb ScWb
SecretMyth SecretMyth
SECTOR7 SECTOR7
SEMTEX SEMTEX
SEPTiC SEPTiC
SEVcD
SEVENTWENTY SEVENTWENTY
SexSh0p SexSh0p
SFM SFM
SGKK SGKK
Shadow
Shadowman Shadowman
SHAMNBOYZ SHAMNBOYZ
SHDXXX SHDXXX
@ -587,16 +675,22 @@ shortbrehd
SHS SHS
SHUNPO SHUNPO
SiC SiC
sickboy88
SiGHTHD SiGHTHD
SiHD SiHD
SiLU SiLU
SINISTER
SiNNERS SiNNERS
SiRiUs.sHaRe
SiTV SiTV
SKALiWAGZ SKALiWAGZ
SkipTowne
SKYLIGHT
SLM SLM
SLO SLO
SLOMO SLOMO
SMoKeR SMoKeR
Smurfenlars
Sneak Sneak
SNUGGLER SNUGGLER
SoCkS SoCkS
@ -607,13 +701,16 @@ SPARKS
SPOOKY SPOOKY
sprinter sprinter
SSF SSF
STAGEMAN
Stealthmaster Stealthmaster
stieg stieg
stoffinho17
Stranded Stranded
streetwars streetwars
STV STV
Subject16 Subject16
SuBoXoNe SuBoXoNe
SUBZERO
SUNSPOT SUNSPOT
SURFER SURFER
SVD SVD
@ -628,16 +725,22 @@ TASTE
TASTETV TASTETV
TB TB
TDF TDF
TDR
TeamRV
TELEFLiX TELEFLiX
TENEIGHTY TENEIGHTY
TeNNReeD
TERRA TERRA
terribleHD terribleHD
terribleSD terribleSD
TFE
THENiGHTMAREiNHD THENiGHTMAREiNHD
TheWretched TheWretched
Thizz
THOR THOR
THORA THORA
THUGLiNE THUGLiNE
THUNDER
TiDE TiDE
TiMELORDS TiMELORDS
TiMPE TiMPE
@ -653,8 +756,10 @@ TOPAZ
TorrenTGui TorrenTGui
tpz tpz
trentalent trentalent
TRiMEDIA
TRiPS TRiPS
TrollHD TrollHD
trosa
TruCK TruCK
tRuE tRuE
TRUEFRENCH TRUEFRENCH
@ -663,6 +768,7 @@ TsH
tsn tsn
TURKiSO TURKiSO
TUSAHD TUSAHD
Tushar
TVA TVA
TW TW
TWiZTED TWiZTED
@ -676,12 +782,15 @@ USELESS
UVall UVall
VaAr3 VaAr3
VALiOMEDiA VALiOMEDiA
VALKYRiA
VAMPS VAMPS
Vanillapunk Vanillapunk
VanRay VanRay
VCDVaULT VCDVaULT
VeGaN VeGaN
Vegapunk Vegapunk
VeggTeppe
Vex
ViCiOsO ViCiOsO
ViKAT ViKAT
ViNYL ViNYL
@ -712,9 +821,11 @@ WiRE
WLM WLM
WoLF WoLF
Wolky Wolky
WoRKZ
WPi WPi
WRCR WRCR
WuSiWuG WuSiWuG
XanaX
Xander Xander
XiA XiA
XOR XOR
@ -735,4 +846,9 @@ Yibis
YoHo YoHo
YOUFORGOTTOREPACKTHIS YOUFORGOTTOREPACKTHIS
ZBS ZBS
ZEKTORM
ZEN
Zeus.Dias
ZMG ZMG
Zox
Zuzuu

View File

@ -1,18 +1,27 @@
// filebot -script "http://filebot.sf.net/scripts/cleaner.groovy" -trust-script /path/to/media/ // filebot -script "http://filebot.sf.net/scripts/cleaner.groovy" [--action test] /path/to/media/
/* /*
* Delete orphaned "clutter" files like nfo, jpg, etc * Delete orphaned "clutter" files like nfo, jpg, etc and sample files
*/ */
def isClutter(file) { def isClutter(f) {
return file.hasExtension("nfo", "txt", "jpg", "jpeg") f.path =~ /\b(?i:sample|trailer|extras|deleted.scenes|music.video|scrapbook)\b/ || f.hasExtension("jpg", "jpeg", "png", "gif", "nfo", "xml", "htm", "html", "log", "srt", "sub", "idx", "md5", "sfv", "txt", "rtf", "url", "db", "dna")
} }
def clean(f) {
println "Delete $f"
// do a dry run via --action test
if (_args.action == 'test') {
return false
}
return f.isDirectory() ? f.deleteDir() : f.delete()
}
// delete clutter files in orphaned media folders // delete clutter files in orphaned media folders
args.getFiles{ isClutter(it) && !it.dir.hasFile{ it.isVideo() }}.each { args.getFiles{ isClutter(it) && !it.dir.hasFile{ (it.isVideo() || it.isAudio()) && !isClutter(it) }}.each { clean(it) }
println "Delete file $it: " + it.delete()
}
// delete empty folders but exclude roots // delete empty folders but exclude given args
args.getFolders{ it.getFiles().isEmpty() && !args.contains(it) }.each { args.getFolders{ it.listFiles().length == 0 && !args.contains(it) }.each { clean(it) }
println "Delete dir $it: " + it.deleteDir()
}