* various movie detection refinement regarding issues with these kinda naming patterns

e.g.
The.Croods.2013.Custom.DKsubs.TS.PAL.DVDR-SUBLiME/sublime-thecroods_ts.iso
This commit is contained in:
Reinhard Pointner 2013-04-08 05:29:12 +00:00
parent ed30ae0159
commit 02f714b6ab
4 changed files with 49 additions and 16 deletions

View File

@ -336,11 +336,7 @@ public class MediaDetection {
names.addAll(matches);
// don't allow duplicates
Map<String, String> unique = new LinkedHashMap<String, String>();
for (String it : names) {
unique.put(normalizePunctuation(it).toLowerCase(), it);
}
return new ArrayList<String>(unique.values());
return getUniqueQuerySet(names);
}
@ -503,14 +499,17 @@ public class MediaDetection {
Collection<String> terms = new LinkedHashSet<String>();
// 1. term: try to match movie pattern 'name (year)' or use filename as is
terms.add(reduceMovieName(getName(movieFile)));
terms.add(getName(movieFile));
// 2. term: first meaningful parent folder
File movieFolder = guessMovieFolder(movieFile);
if (movieFolder != null) {
terms.add(reduceMovieName(getName(movieFolder)));
terms.add(getName(movieFolder));
}
// reduce movie names
terms = new LinkedHashSet<String>(reduceMovieNamePermutations(terms));
List<Movie> movieNameMatches = matchMovieName(terms, true, 0);
if (movieNameMatches.isEmpty()) {
movieNameMatches = matchMovieName(terms, strict, 2);
@ -585,12 +584,32 @@ public class MediaDetection {
}
public static String reduceMovieName(String name) throws IOException {
Matcher reluctantMatcher = compile("^(.+)[\\[\\(]((?:19|20)\\d{2})[\\]\\)]").matcher(name);
if (reluctantMatcher.find()) {
return String.format("%s %s", reluctantMatcher.group(1).trim(), reluctantMatcher.group(2));
public static String reduceMovieName(String name, boolean strict) throws IOException {
Matcher matcher = compile(strict ? "^(.+)[\\[\\(]((?:19|20)\\d{2})[\\]\\)]" : "^(.+?)((?:19|20)\\d{2})").matcher(name);
if (matcher.find()) {
return String.format("%s %s", normalizePunctuation(matcher.group(1)), matcher.group(2));
}
return name;
return null;
}
public static Collection<String> reduceMovieNamePermutations(Collection<String> terms) throws IOException {
LinkedList<String> names = new LinkedList<String>();
for (String it : terms) {
String rn = reduceMovieName(it, true);
if (rn != null) {
names.addFirst(rn);
} else {
names.addLast(it); // unsure, keep original term just in case, but also try non-strict reduce
rn = reduceMovieName(it, false);
if (rn != null) {
names.addLast(rn);
}
}
}
return names;
}
@ -736,12 +755,15 @@ public class MediaDetection {
private static Collection<Movie> queryMovieByFileName(Collection<String> files, MovieIdentificationService queryLookupService, Locale locale) throws Exception {
// remove blacklisted terms
Set<String> querySet = new LinkedHashSet<String>();
List<String> querySet = new ArrayList<String>();
querySet.addAll(stripReleaseInfo(files, true));
querySet.addAll(stripReleaseInfo(files, false));
// remove duplicates
querySet = getUniqueQuerySet(querySet);
// DEBUG
// System.out.format("%s: %s%n", queryLookupService.getName(), querySet);
// System.out.format("Query %s: %s%n", queryLookupService.getName(), querySet);
final SimilarityMetric metric = new NameSimilarityMetric();
final Map<Movie, Float> probabilityMap = new LinkedHashMap<Movie, Float>();
@ -765,6 +787,15 @@ public class MediaDetection {
}
private static List<String> getUniqueQuerySet(Collection<String> terms) {
Map<String, String> unique = new LinkedHashMap<String, String>();
for (String it : terms) {
unique.put(normalizePunctuation(it).toLowerCase(), it);
}
return new ArrayList<String>(unique.values());
}
public static String stripReleaseInfo(String name) {
try {
return releaseInfo.cleanRelease(singleton(name), true).iterator().next();

View File

@ -181,7 +181,7 @@ public class ReleaseInfo {
public Pattern getLanguageSuffixPattern(Collection<String> languages, boolean strict) {
// .en.srt
return compile("(?<=" + (strict ? "[.]" : "[\\p{Punct}\\p{Space}]") + ")(" + join(quoteAll(languages), "|") + ")(?=[._ ]*$)", (strict ? 0 : CASE_INSENSITIVE) | UNICODE_CASE);
return compile("(?<=[.])(" + join(quoteAll(languages), "|") + ")(?=[._ ]*$)", (strict ? 0 : CASE_INSENSITIVE) | UNICODE_CASE);
}

View File

@ -2,7 +2,7 @@
pattern.video.source: CAMRip|CAM|PDVD|TS|TELESYNC|PDVD|PPV|PPVRip|Screener|SCR|SCREENER|DVDSCR|DVDSCREENER|BDSCR|R4|R5|R5LINE|R5.LINE|DVD|DVDRip|DVDR|TVRip|DSR|PDTV|SDTV|HDTV|DVB|DVBRip|DTHRip|VODRip|VODR|BDRip|BRRip|BluRay|BDR|BR.Scr|BR.Screener|HDDVD|HDRip|WorkPrint|VHS|VCD|TELECINE|WEB.DL|WEBRip|ithd|iTunesHD
# additional release info patterns
pattern.video.format: DivX|Xvid|AVC|x264|h264|3ivx|mpg|mpeg|mpeg4|mp3|aac|ac3|dd20|dd51|2ch|6ch|DTS|DTS.HD|DTS.HD.MA|TrueHD|WS|HR|7p|720p|18p|1080p|NTSC
pattern.video.format: DivX|Xvid|AVC|x264|h264|3ivx|mpg|mpeg|mpeg4|mp3|aac|ac3|dd20|dd51|2ch|6ch|TS|DTS|DTS.HD|DTS.HD.MA|TrueHD|WS|HR|7p|720p|18p|1080p|PAL|NTSC
# known release group names
url.release-groups: http://filebot.net/data/release-groups.txt

View File

@ -100,6 +100,7 @@ Channel.4
Channel.5
CLASSIC
CN
Custom.DKsubs
CVCD
DC
DD2[.,]0
@ -112,6 +113,7 @@ ded
Demonoid
Director's.Cut
Directors.Cut
DKsubs
DL
docu
Dual.Audio