Don't add non The|A alias titles if what remains is a single word (e.g. The Voice VS Voice)

This commit is contained in:
Reinhard Pointner 2016-03-12 17:02:13 +00:00
parent 0084f7e265
commit a4a46cc65a
1 changed files with 11 additions and 6 deletions

View File

@ -85,14 +85,19 @@ def isValidMovieName(s) {
def getNamePermutations(names) {
def normalize = { s -> s.toLowerCase().normalizePunctuation() }.memoize()
def fn1 = { s -> def n = s.replaceAll(/(?i)(^(The|A)\s)|([,]\s(The|A)$)/, ''); s =~ /^(?i:The|A)/ && n ==~ /\w+/ ? s : n } // e.g. The Walking Dead => Walking Dead, The Voice => The Voice
def fn2 = { s -> s.replaceAll(/\s&\s/, ' and ') }
def fn3 = { s -> s.replaceAll(/\([^\)]*\)$/, '') }
def out = names*.trim().unique().collectMany{ original ->
def simplified = original
[fn1, fn2, fn3].each{ fn -> simplified = fn(simplified).trim() }
return [original, simplified]
def s = original.trim()
s = s.replaceAll(/([,]\s(The|A)$)/, '').trim()
s = s.replaceAll(/\s&\s/, ' and ')
s = s.replaceAll(/\s\([^\)]*\)$/, '').trim()
// e.g. The Walking Dead => Walking Dead, The Voice => The Voice
def sn = s.replaceAll(/^(?i:The|A)\s/, '').trim()
if (sn ==~ /\w+/) {
return [original, s]
}
return [original, sn]
}.unique{ normalize(it) }.findAll{ it.length() > 0 }
out = out.findAll{ it.length() >= 2 && !(it ==~ /[1][0-9][1-9]/) && !(it =~ /^[a-z]/) && it =~ /^[@.\p{L}\p{Digit}]/ } // MUST START WITH UNICODE LETTER