* reduce index size by reducing entries with redundant substring sequences
This commit is contained in:
parent
1ce479e858
commit
56253dfb7a
@ -64,7 +64,8 @@ def isValidMovieName(s) {
|
||||
}
|
||||
|
||||
def getNamePermutations(names) {
|
||||
def fn1 = { s -> s.replaceAll(/^(?i)(The|A)\s/, '') }
|
||||
def normalize = { s -> s.toLowerCase().normalizePunctuation() }.memoize()
|
||||
def fn1 = { s -> s.replaceAll(/(?i)(^(The|A)\s)|([,]\s(The|A)$)/, '') }
|
||||
def fn2 = { s -> s.replaceAll(/\s&\s/, ' and ') }
|
||||
def fn3 = { s -> s.replaceAll(/\([^\)]*\)$/, '') }
|
||||
|
||||
@ -72,12 +73,13 @@ def getNamePermutations(names) {
|
||||
def simplified = original
|
||||
[fn1, fn2, fn3].each{ fn -> simplified = fn(simplified).trim() }
|
||||
return [original, simplified]
|
||||
}.unique().toList()
|
||||
}.unique{ normalize(it) }.findAll{ it.length() > 0 }
|
||||
|
||||
out = out.findAll{ it.length() >= 2 && !(it ==~ /[1][0-9][1-9]/) && !(it =~ /^[a-z]/) && it =~ /^[@.\p{L}\p{Digit}]/ } // MUST START WITH UNICODE LETTER
|
||||
out = out.findAll{ !MediaDetection.releaseInfo.structureRootPattern.matcher(it).matches() } // IGNORE NAMES THAT OVERLAP WITH MEDIA FOLDER NAMES
|
||||
out = out.findAll{ a -> names.take(1).contains(a) || out.findAll{ b -> normalize(a).startsWith(normalize(b) + ' ') }.size() == 0 } // TRY TO EXCLUDE REDUNDANT SUBSTRING DUPLICATES
|
||||
|
||||
return out.unique{ it.toLowerCase().normalizePunctuation() }.findAll{ it.length() > 0 }
|
||||
return out
|
||||
}
|
||||
|
||||
def treeSort(list, keyFunction) {
|
||||
|
Loading…
Reference in New Issue
Block a user