* fine-tune query clean-up => improved movie matching

This commit is contained in:
Reinhard Pointner 2012-11-22 16:45:40 +00:00
parent ea9cc3bca7
commit ee9d14651d
2 changed files with 9 additions and 5 deletions

View File

@ -69,7 +69,7 @@ public class ReleaseInfo {
// match locale identifier and lookup Locale object
Map<String, Locale> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault());
String lang = matchLast(getLanguageSuffixPattern(languages.keySet()), null, name);
String lang = matchLast(getLanguageSuffixPattern(languages.keySet(), false), null, name);
if (lang == null)
return null;
@ -121,7 +121,7 @@ public class ReleaseInfo {
Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet();
Pattern clutterBracket = getClutterBracketPattern(strict);
Pattern releaseGroup = getReleaseGroupPattern(strict);
Pattern languageSuffix = getLanguageSuffixPattern(languages);
Pattern languageSuffix = getLanguageSuffixPattern(languages, strict);
Pattern languageTag = getLanguageTagPattern(languages);
Pattern videoSource = getVideoSourcePattern();
Pattern videoFormat = getVideoFormatPattern();
@ -181,9 +181,9 @@ public class ReleaseInfo {
}
public Pattern getLanguageSuffixPattern(Collection<String> languages) {
public Pattern getLanguageSuffixPattern(Collection<String> languages, boolean strict) {
// .en.srt
return compile("(?<=[\\p{Punct}\\p{Space}])(" + join(quoteAll(languages), "|") + ")(?=[._ ]*$)", CASE_INSENSITIVE | UNICODE_CASE);
return compile("(?<=" + (strict ? "[.]" : "[\\p{Punct}\\p{Space}]") + ")(" + join(quoteAll(languages), "|") + ")(?=[._ ]*$)", (strict ? 0 : CASE_INSENSITIVE) | UNICODE_CASE);
}
@ -422,7 +422,7 @@ public class ReleaseInfo {
for (Locale language : new HashSet<Locale>(asList(supportedDisplayLocale))) {
// make sure language name is properly normalized so accents and whatever don't break the regex pattern syntax
String languageName = Normalizer.normalize(locale.getDisplayLanguage(language), Form.NFKD);
languageMap.put(languageName, locale);
languageMap.put(languageName.toLowerCase(), locale);
}
}

View File

@ -343,6 +343,7 @@ DCP
DDC
dddc
DDR
DeadFish
DEAL
DeBCz
DECADE
@ -880,6 +881,7 @@ LTRG
LTT
LTU
LU3UR
Lum1x
LUSO
M794
MACHD
@ -1009,6 +1011,7 @@ NuMy
NUXX
NVA
NWO
NWTC
NYDIC
NyTT
O2
@ -1111,6 +1114,7 @@ PSYCHD
PTBR
Pti
PtP
PTpOWeR
PtS
Pudding
PUKKA