diff --git a/source/net/sourceforge/filebot/media/MediaDetection.java b/source/net/sourceforge/filebot/media/MediaDetection.java index 5e6daa92..3de6b311 100644 --- a/source/net/sourceforge/filebot/media/MediaDetection.java +++ b/source/net/sourceforge/filebot/media/MediaDetection.java @@ -336,11 +336,7 @@ public class MediaDetection { names.addAll(matches); // don't allow duplicates - Map unique = new LinkedHashMap(); - for (String it : names) { - unique.put(normalizePunctuation(it).toLowerCase(), it); - } - return new ArrayList(unique.values()); + return getUniqueQuerySet(names); } @@ -503,14 +499,17 @@ public class MediaDetection { Collection terms = new LinkedHashSet(); // 1. term: try to match movie pattern 'name (year)' or use filename as is - terms.add(reduceMovieName(getName(movieFile))); + terms.add(getName(movieFile)); // 2. term: first meaningful parent folder File movieFolder = guessMovieFolder(movieFile); if (movieFolder != null) { - terms.add(reduceMovieName(getName(movieFolder))); + terms.add(getName(movieFolder)); } + // reduce movie names + terms = new LinkedHashSet(reduceMovieNamePermutations(terms)); + List movieNameMatches = matchMovieName(terms, true, 0); if (movieNameMatches.isEmpty()) { movieNameMatches = matchMovieName(terms, strict, 2); @@ -585,12 +584,32 @@ public class MediaDetection { } - public static String reduceMovieName(String name) throws IOException { - Matcher reluctantMatcher = compile("^(.+)[\\[\\(]((?:19|20)\\d{2})[\\]\\)]").matcher(name); - if (reluctantMatcher.find()) { - return String.format("%s %s", reluctantMatcher.group(1).trim(), reluctantMatcher.group(2)); + public static String reduceMovieName(String name, boolean strict) throws IOException { + Matcher matcher = compile(strict ? "^(.+)[\\[\\(]((?:19|20)\\d{2})[\\]\\)]" : "^(.+?)((?:19|20)\\d{2})").matcher(name); + if (matcher.find()) { + return String.format("%s %s", normalizePunctuation(matcher.group(1)), matcher.group(2)); } - return name; + return null; + } + + + public static Collection reduceMovieNamePermutations(Collection terms) throws IOException { + LinkedList names = new LinkedList(); + + for (String it : terms) { + String rn = reduceMovieName(it, true); + if (rn != null) { + names.addFirst(rn); + } else { + names.addLast(it); // unsure, keep original term just in case, but also try non-strict reduce + rn = reduceMovieName(it, false); + if (rn != null) { + names.addLast(rn); + } + } + } + + return names; } @@ -736,12 +755,15 @@ public class MediaDetection { private static Collection queryMovieByFileName(Collection files, MovieIdentificationService queryLookupService, Locale locale) throws Exception { // remove blacklisted terms - Set querySet = new LinkedHashSet(); + List querySet = new ArrayList(); querySet.addAll(stripReleaseInfo(files, true)); querySet.addAll(stripReleaseInfo(files, false)); + // remove duplicates + querySet = getUniqueQuerySet(querySet); + // DEBUG - // System.out.format("%s: %s%n", queryLookupService.getName(), querySet); + // System.out.format("Query %s: %s%n", queryLookupService.getName(), querySet); final SimilarityMetric metric = new NameSimilarityMetric(); final Map probabilityMap = new LinkedHashMap(); @@ -765,6 +787,15 @@ public class MediaDetection { } + private static List getUniqueQuerySet(Collection terms) { + Map unique = new LinkedHashMap(); + for (String it : terms) { + unique.put(normalizePunctuation(it).toLowerCase(), it); + } + return new ArrayList(unique.values()); + } + + public static String stripReleaseInfo(String name) { try { return releaseInfo.cleanRelease(singleton(name), true).iterator().next(); diff --git a/source/net/sourceforge/filebot/media/ReleaseInfo.java b/source/net/sourceforge/filebot/media/ReleaseInfo.java index 4b56ac52..80f78aa8 100644 --- a/source/net/sourceforge/filebot/media/ReleaseInfo.java +++ b/source/net/sourceforge/filebot/media/ReleaseInfo.java @@ -181,7 +181,7 @@ public class ReleaseInfo { public Pattern getLanguageSuffixPattern(Collection languages, boolean strict) { // .en.srt - return compile("(?<=" + (strict ? "[.]" : "[\\p{Punct}\\p{Space}]") + ")(" + join(quoteAll(languages), "|") + ")(?=[._ ]*$)", (strict ? 0 : CASE_INSENSITIVE) | UNICODE_CASE); + return compile("(?<=[.])(" + join(quoteAll(languages), "|") + ")(?=[._ ]*$)", (strict ? 0 : CASE_INSENSITIVE) | UNICODE_CASE); } diff --git a/source/net/sourceforge/filebot/media/ReleaseInfo.properties b/source/net/sourceforge/filebot/media/ReleaseInfo.properties index 84789bdd..b1fb506c 100644 --- a/source/net/sourceforge/filebot/media/ReleaseInfo.properties +++ b/source/net/sourceforge/filebot/media/ReleaseInfo.properties @@ -2,7 +2,7 @@ pattern.video.source: CAMRip|CAM|PDVD|TS|TELESYNC|PDVD|PPV|PPVRip|Screener|SCR|SCREENER|DVDSCR|DVDSCREENER|BDSCR|R4|R5|R5LINE|R5.LINE|DVD|DVDRip|DVDR|TVRip|DSR|PDTV|SDTV|HDTV|DVB|DVBRip|DTHRip|VODRip|VODR|BDRip|BRRip|BluRay|BDR|BR.Scr|BR.Screener|HDDVD|HDRip|WorkPrint|VHS|VCD|TELECINE|WEB.DL|WEBRip|ithd|iTunesHD # additional release info patterns -pattern.video.format: DivX|Xvid|AVC|x264|h264|3ivx|mpg|mpeg|mpeg4|mp3|aac|ac3|dd20|dd51|2ch|6ch|DTS|DTS.HD|DTS.HD.MA|TrueHD|WS|HR|7p|720p|18p|1080p|NTSC +pattern.video.format: DivX|Xvid|AVC|x264|h264|3ivx|mpg|mpeg|mpeg4|mp3|aac|ac3|dd20|dd51|2ch|6ch|TS|DTS|DTS.HD|DTS.HD.MA|TrueHD|WS|HR|7p|720p|18p|1080p|PAL|NTSC # known release group names url.release-groups: http://filebot.net/data/release-groups.txt diff --git a/website/data/query-blacklist.txt b/website/data/query-blacklist.txt index f1541574..8989615c 100644 --- a/website/data/query-blacklist.txt +++ b/website/data/query-blacklist.txt @@ -100,6 +100,7 @@ Channel.4 Channel.5 CLASSIC CN +Custom.DKsubs CVCD DC DD2[.,]0 @@ -112,6 +113,7 @@ ded Demonoid Director's.Cut Directors.Cut +DKsubs DL docu Dual.Audio