From ee3c1d25d77e8b3ffe8a1d968a0259661a89012e Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Tue, 13 Aug 2013 04:48:13 +0000 Subject: [PATCH] * Better support for Name AKA Name YEAR naming scheme @see http://www.filebot.net/forums/viewtopic.php?f=6&t=882 --- .../filebot/media/MediaDetection.java | 408 ++++++++---------- website/data/query-blacklist.txt | 1 + website/data/release-groups.txt | 1 + 3 files changed, 190 insertions(+), 220 deletions(-) diff --git a/source/net/sourceforge/filebot/media/MediaDetection.java b/source/net/sourceforge/filebot/media/MediaDetection.java index 55e8e3bf..79d0eec1 100644 --- a/source/net/sourceforge/filebot/media/MediaDetection.java +++ b/source/net/sourceforge/filebot/media/MediaDetection.java @@ -1,14 +1,22 @@ - package net.sourceforge.filebot.media; - -import static java.util.Collections.*; -import static java.util.regex.Pattern.*; -import static net.sourceforge.filebot.MediaTypes.*; -import static net.sourceforge.filebot.Settings.*; -import static net.sourceforge.filebot.similarity.CommonSequenceMatcher.*; -import static net.sourceforge.filebot.similarity.Normalization.*; -import static net.sourceforge.tuned.FileUtilities.*; +import static java.util.Collections.addAll; +import static java.util.Collections.emptyList; +import static java.util.Collections.reverseOrder; +import static java.util.Collections.singleton; +import static java.util.Collections.sort; +import static java.util.Collections.synchronizedMap; +import static java.util.regex.Pattern.compile; +import static net.sourceforge.filebot.MediaTypes.NFO_FILES; +import static net.sourceforge.filebot.Settings.useExtendedFileAttributes; +import static net.sourceforge.filebot.similarity.CommonSequenceMatcher.getLenientCollator; +import static net.sourceforge.filebot.similarity.Normalization.normalizeBrackets; +import static net.sourceforge.filebot.similarity.Normalization.normalizePunctuation; +import static net.sourceforge.tuned.FileUtilities.filter; +import static net.sourceforge.tuned.FileUtilities.getName; +import static net.sourceforge.tuned.FileUtilities.listFiles; +import static net.sourceforge.tuned.FileUtilities.mapByFolder; +import static net.sourceforge.tuned.FileUtilities.readFile; import java.io.File; import java.io.FileFilter; @@ -62,76 +70,66 @@ import net.sourceforge.filebot.web.SearchResult; import net.sourceforge.filebot.web.TheTVDBClient.SeriesInfo; import net.sourceforge.filebot.web.TheTVDBSearchResult; - public class MediaDetection { - + public static final ReleaseInfo releaseInfo = new ReleaseInfo(); - + private static FileFilter diskFolder; private static FileFilter clutterFile; - - + public static FileFilter getDiskFolderFilter() { if (diskFolder == null) { diskFolder = releaseInfo.getDiskFolderFilter(); } return diskFolder; } - - + public static FileFilter getClutterFileFilter() throws IOException { if (clutterFile == null) { clutterFile = releaseInfo.getClutterFileFilter(); } return clutterFile; } - - + public static boolean isDiskFolder(File folder) { return getDiskFolderFilter().accept(folder); } - - + public static boolean isClutterFile(File file) throws IOException { return getClutterFileFilter().accept(file); } - - + public static boolean isEpisode(String name, boolean strict) { return parseEpisodeNumber(name, strict) != null || parseDate(name) != null; } - - + public static List parseEpisodeNumber(String string, boolean strict) { return new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict).match(string); } - - + public static List parseEpisodeNumber(File file, boolean strict) { return new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict).match(file); } - - + public static Date parseDate(Object object) { return new DateMetric().parse(object); } - - + public static Map, Set> mapSeriesNamesByFiles(Collection files, Locale locale) throws Exception { // map series names by folder Map> seriesNamesByFolder = new HashMap>(); Map> filesByFolder = mapByFolder(files); - + for (Entry> it : filesByFolder.entrySet()) { Set namesForFolder = new TreeSet(getLenientCollator(locale)); namesForFolder.addAll(detectSeriesNames(it.getValue(), locale)); - + seriesNamesByFolder.put(it.getKey(), namesForFolder); } - + // reverse map folders by series name Map> foldersBySeriesName = new HashMap>(); - + for (Set nameSet : seriesNamesByFolder.values()) { for (String name : nameSet) { Set foldersForSeries = new HashSet(); @@ -143,17 +141,17 @@ public class MediaDetection { foldersBySeriesName.put(name, foldersForSeries); } } - + // join both sets Map, Set> batchSets = new HashMap, Set>(); - + while (seriesNamesByFolder.size() > 0) { Set combinedNameSet = new TreeSet(getLenientCollator(locale)); Set combinedFolderSet = new HashSet(); - + // build combined match set combinedFolderSet.add(seriesNamesByFolder.keySet().iterator().next()); - + boolean resolveFurther = true; while (resolveFurther) { boolean modified = false; @@ -165,32 +163,32 @@ public class MediaDetection { } resolveFurther &= modified; } - + // build result entry Set combinedFileSet = new TreeSet(); for (File folder : combinedFolderSet) { combinedFileSet.addAll(filesByFolder.get(folder)); } - + if (combinedFileSet.size() > 0) { // divide file set per complete series set Map> filesByEpisode = new LinkedHashMap>(); for (File file : combinedFileSet) { Object eid = getEpisodeIdentifier(file.getName(), true); - - // SPECIAL CASE: 101, 201, 202, etc 3-digit SxE pattern + + // SPECIAL CASE: 101, 201, 202, etc 3-digit SxE pattern if (eid == null) { List d3sxe = new SeasonEpisodePattern(null, "(? 0) { eid = d3sxe; } } - + // merge specials into first SxE group if (eid == null) { eid = file; // open new SxE group for each unrecognized file } - + List episodeFiles = filesByEpisode.get(eid); if (episodeFiles == null) { episodeFiles = new ArrayList(); @@ -198,7 +196,7 @@ public class MediaDetection { } episodeFiles.add(file); } - + for (int i = 0; true; i++) { Set series = new LinkedHashSet(); for (List episode : filesByEpisode.values()) { @@ -206,24 +204,24 @@ public class MediaDetection { series.add(episode.get(i)); } } - + if (series.isEmpty()) { break; } - + combinedFileSet.removeAll(series); batchSets.put(series, combinedNameSet); } - + if (combinedFileSet.size() > 0) { batchSets.put(combinedFileSet, combinedNameSet); } } - + // set folders as accounted for seriesNamesByFolder.keySet().removeAll(combinedFolderSet); } - + // handle files that have not been matched to a batch set yet Set remainingFiles = new HashSet(files); for (Set batch : batchSets.keySet()) { @@ -232,26 +230,24 @@ public class MediaDetection { if (remainingFiles.size() > 0) { batchSets.put(remainingFiles, null); } - + return batchSets; } - - + public static Object getEpisodeIdentifier(CharSequence name, boolean strict) { // check SxE first Object match = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict).match(name); - + // then Date pattern if (match == null) match = new DateMatcher().match(name); - + return match; } - - + public static List detectSeriesNames(Collection files, Locale locale) throws Exception { List names = new ArrayList(); - + // try xattr metadata if enabled if (useExtendedFileAttributes()) { try { @@ -268,7 +264,7 @@ public class MediaDetection { // ignore } } - + // try to detect series name via nfo files try { for (SearchResult it : lookupSeriesNameByInfoFile(files, locale)) { @@ -277,14 +273,14 @@ public class MediaDetection { } catch (Exception e) { Logger.getLogger(MediaDetection.class.getClass().getName()).log(Level.WARNING, "Failed to lookup info by id: " + e.getMessage(), e); } - + // try to detect series name via known patterns try { names.addAll(matchSeriesByDirectMapping(files)); } catch (Exception e) { Logger.getLogger(MediaDetection.class.getClass().getName()).log(Level.WARNING, "Failed to match direct mappings: " + e.getMessage(), e); } - + // cross-reference known series names against file structure try { Set folders = new LinkedHashSet(); @@ -294,16 +290,16 @@ public class MediaDetection { (i == 0 ? filenames : folders).add(normalizeBrackets(getName(f))); } } - + // check foldernames first List matches = matchSeriesByName(folders, 0); - + // check all filenames if necessary if (matches.isEmpty()) { matches.addAll(matchSeriesByName(filenames, 0)); matches.addAll(matchSeriesByName(stripReleaseInfo(filenames, false), 0)); } - + // use lenient sub sequence matching only as fallback and try name without spacing logic that may mess up any lookup if (matches.isEmpty()) { // try to narrow down file to series name as best as possible @@ -320,25 +316,25 @@ public class MediaDetection { for (SearchResult it : matchSeriesFromStringWithoutSpacing(stripReleaseInfo(sns, false), true)) { matches.add(it.getName()); } - + // less reliable CWS deep matching matches.addAll(matchSeriesByName(folders, 2)); matches.addAll(matchSeriesByName(filenames, 2)); } - + // pass along only valid terms names.addAll(stripBlacklistedTerms(matches)); } catch (Exception e) { Logger.getLogger(MediaDetection.class.getClass().getName()).log(Level.WARNING, "Failed to match folder structure: " + e.getMessage(), e); } - + // match common word sequence and clean detected word sequence from unwanted elements Collection matches = new LinkedHashSet(); - + // check CWS matches SeriesNameMatcher snm = new SeriesNameMatcher(locale); matches.addAll(snm.matchAll(files.toArray(new File[files.size()]))); - + // check for known pattern matches for (File f : files) { String sn = snm.matchByEpisodeIdentifier(getName(f.getParentFile())); @@ -346,7 +342,7 @@ public class MediaDetection { matches.add(sn); } } - + try { Collection priorityMatchSet = new LinkedHashSet(); priorityMatchSet.addAll(stripReleaseInfo(matches, true)); @@ -356,16 +352,15 @@ public class MediaDetection { Logger.getLogger(MediaDetection.class.getClass().getName()).log(Level.WARNING, "Failed to clean matches: " + e.getMessage(), e); } names.addAll(matches); - + // don't allow duplicates return getUniqueQuerySet(names); } - - + public static List matchSeriesByDirectMapping(Collection files) throws Exception { Map seriesDirectMappings = releaseInfo.getSeriesDirectMappings(); List matches = new ArrayList(); - + for (File file : files) { for (Entry it : seriesDirectMappings.entrySet()) { if (it.getKey().matcher(getName(file)).find()) { @@ -373,13 +368,12 @@ public class MediaDetection { } } } - + return matches; } - + private static List> seriesIndex = new ArrayList>(75000); - - + public static synchronized List> getSeriesIndex() throws IOException { if (seriesIndex.isEmpty()) { try { @@ -398,15 +392,14 @@ public class MediaDetection { return emptyList(); } } - + return seriesIndex; } - - + public static List matchSeriesByName(Collection names, int maxStartIndex) throws Exception { HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(maxStartIndex); List matches = new ArrayList(); - + for (String name : names) { String bestMatch = ""; for (Entry it : getSeriesIndex()) { @@ -420,24 +413,23 @@ public class MediaDetection { matches.add(bestMatch); } } - + // sort by length of name match (descending) sort(matches, new Comparator() { - + @Override public int compare(String a, String b) { return Integer.valueOf(b.length()).compareTo(Integer.valueOf(a.length())); } }); - + return matches; } - - + public static List matchSeriesFromStringWithoutSpacing(Collection names, boolean strict) throws IOException { // clear name of punctuation, spacing, and leading 'The' or 'A' that are common causes for word-lookup to fail Pattern spacing = Pattern.compile("(^(?i)(The|A)\\b)|[\\p{Punct}\\p{Space}]+"); - + List terms = new ArrayList(names.size()); for (String it : names) { String term = spacing.matcher(it).replaceAll("").toLowerCase(); @@ -445,11 +437,11 @@ public class MediaDetection { terms.add(term); // only consider words, not just random letters } } - + // similarity threshold based on strict/non-strict SimilarityMetric metric = new NameSimilarityMetric(); float similarityThreshold = strict ? 0.75f : 0.5f; - + List seriesList = new ArrayList(); for (Entry it : getSeriesIndex()) { String name = spacing.matcher(it.getKey()).replaceAll("").toLowerCase(); @@ -464,11 +456,10 @@ public class MediaDetection { } return seriesList; } - - + public static Collection detectMovie(File movieFile, MovieIdentificationService hashLookupService, MovieIdentificationService queryLookupService, Locale locale, boolean strict) throws Exception { Set options = new LinkedHashSet(); - + // try xattr metadata if enabled if (useExtendedFileAttributes()) { try { @@ -485,7 +476,7 @@ public class MediaDetection { // ignore } } - + // lookup by file hash if (hashLookupService != null && movieFile.isFile()) { try { @@ -498,7 +489,7 @@ public class MediaDetection { Logger.getLogger(MediaDetection.class.getName()).log(Level.WARNING, hashLookupService.getName() + ": " + e.getMessage()); } } - + // lookup by id from nfo file if (queryLookupService != null) { for (int imdbid : grepImdbId(movieFile.getPath())) { @@ -507,7 +498,7 @@ public class MediaDetection { options.add(movie); } } - + // try to grep imdb id from nfo files for (int imdbid : grepImdbIdFor(movieFile)) { Movie movie = queryLookupService.getMovieDescriptor(imdbid, locale); @@ -516,33 +507,33 @@ public class MediaDetection { } } } - + // search by file name or folder name Collection terms = new LinkedHashSet(); - + // 1. term: try to match movie pattern 'name (year)' or use filename as is terms.add(getName(movieFile)); - + // 2. term: first meaningful parent folder File movieFolder = guessMovieFolder(movieFile); if (movieFolder != null) { terms.add(getName(movieFolder)); } - + // reduce movie names terms = new LinkedHashSet(reduceMovieNamePermutations(terms)); - + List movieNameMatches = matchMovieName(terms, true, 0); if (movieNameMatches.isEmpty()) { movieNameMatches = matchMovieName(terms, strict, 2); } - + // skip further queries if collected matches are already sufficient if (options.size() > 0 && movieNameMatches.size() > 0) { options.addAll(movieNameMatches); return sortBySimilarity(options, terms); } - + // if matching name+year failed, try matching only by name if (movieNameMatches.isEmpty() && strict) { movieNameMatches = matchMovieName(terms, false, 0); @@ -550,52 +541,53 @@ public class MediaDetection { movieNameMatches = matchMovieName(terms, false, 2); } } - + // assume name without spacing will mess up any lookup if (movieNameMatches.isEmpty()) { movieNameMatches = matchMovieFromStringWithoutSpacing(terms, strict); - + if (movieNameMatches.isEmpty() && !terms.equals(stripReleaseInfo(terms, true))) { movieNameMatches = matchMovieFromStringWithoutSpacing(stripReleaseInfo(terms, true), strict); } } - + // query by file / folder name if (queryLookupService != null) { Collection results = queryMovieByFileName(terms, queryLookupService, locale); - + // try query without year as it sometimes messes up results if years don't match properly (movie release years vs dvd release year, etc) if (results.isEmpty() && !strict) { - List termsWithoutYear = new ArrayList(); + List lastResortQueryList = new ArrayList(); Pattern yearPattern = Pattern.compile("(?:19|20)\\d{2}"); + Pattern akaPattern = Pattern.compile("\\bAKA\\b", Pattern.CASE_INSENSITIVE); for (String term : terms) { - Matcher m = yearPattern.matcher(term); - if (m.find()) { - termsWithoutYear.add(m.replaceAll("").trim()); + if (yearPattern.matcher(term).find() || akaPattern.matcher(term).find()) { + // try to separate AKA titles as well into separate searches + for (String mn : akaPattern.split(yearPattern.matcher(term).replaceAll(""))) { + lastResortQueryList.add(mn.trim()); + } } } - if (termsWithoutYear.size() > 0) { - results = queryMovieByFileName(termsWithoutYear, queryLookupService, locale); + if (lastResortQueryList.size() > 0) { + results = queryMovieByFileName(lastResortQueryList, queryLookupService, locale); } } - + options.addAll(results); } - + // add local matching after online search options.addAll(movieNameMatches); - + // sort by relevance return sortBySimilarity(options, terms); } - - + public static SimilarityMetric getMovieMatchMetric() { return new MetricAvg(new SequenceMatchSimilarity(), new NameSimilarityMetric(), new SequenceMatchSimilarity(0, true), new NumericSimilarityMetric() { - + private Pattern year = Pattern.compile("\\b\\d{4}\\b"); - - + @Override protected String normalize(Object object) { Matcher ym = year.matcher(object.toString()); @@ -605,28 +597,25 @@ public class MediaDetection { } return sb.toString().trim(); } - - + @Override public float getSimilarity(Object o1, Object o2) { return super.getSimilarity(o1, o2) * 2; // DOUBLE WEIGHT FOR YEAR MATCH } }); } - - + public static List sortBySimilarity(Collection options, Collection terms) throws IOException { List paragon = stripReleaseInfo(terms, true); List sorted = new ArrayList(options); sort(sorted, new SimilarityComparator(getMovieMatchMetric(), paragon.toArray())); - + // DEBUG // System.out.format("sortBySimilarity %s => %s", terms, options); - + return sorted; } - - + public static String reduceMovieName(String name, boolean strict) throws IOException { Matcher matcher = compile(strict ? "^(.+)[\\[\\(]((?:19|20)\\d{2})[\\]\\)]" : "^(.+?)((?:19|20)\\d{2})").matcher(name); if (matcher.find()) { @@ -634,11 +623,10 @@ public class MediaDetection { } return null; } - - + public static Collection reduceMovieNamePermutations(Collection terms) throws IOException { LinkedList names = new LinkedList(); - + for (String it : terms) { String rn = reduceMovieName(it, true); if (rn != null) { @@ -651,36 +639,34 @@ public class MediaDetection { } } } - + return names; } - - + public static File guessMovieFolder(File movieFile) throws Exception { File folder = guessMovieFolderWithoutSanity(movieFile); - + // perform sanity checks if (folder == null || folder.getName().isEmpty() || folder.equals(new File(System.getProperty("user.home")))) { return null; } - + return folder; } - - + private static File guessMovieFolderWithoutSanity(File movieFile) throws Exception { - // special case for folder mode + // special case for folder mode if (movieFile.isDirectory()) { File f = movieFile; - - // check for double nested structures + + // check for double nested structures if (checkMovie(f.getParentFile(), false) != null && checkMovie(f, false) == null) { return f.getParentFile(); } else { return f; } } - + // first parent folder that matches a movie (max 3 levels deep) for (boolean strictness : new boolean[] { true, false }) { File f = movieFile.getParentFile(); @@ -691,13 +677,13 @@ public class MediaDetection { } } } - + // otherwise try the first potentially meaningful parent folder (max 2 levels deep) File f = movieFile.getParentFile(); for (int i = 0; f != null && i < 2; f = f.getParentFile(), i++) { String term = stripReleaseInfo(f.getName()); if (term.length() > 0) { - // check for double nested structures + // check for double nested structures if (checkMovie(f.getParentFile(), false) != null && checkMovie(f, false) == null) { return f.getParentFile(); } else { @@ -705,22 +691,20 @@ public class MediaDetection { } } } - + if (movieFile.getParentFile() != null && stripReleaseInfo(movieFile.getParentFile().getName()).length() > 0) { return movieFile.getParentFile(); } return null; } - - + public static Movie checkMovie(File file, boolean strict) throws Exception { List matches = file != null ? matchMovieName(singleton(file.getName()), strict, 4) : null; return matches != null && matches.size() > 0 ? matches.get(0) : null; } - + private static List> movieIndex = new ArrayList>(100000); - - + public static synchronized List> getMovieIndex() throws IOException { if (movieIndex.isEmpty()) { try { @@ -733,16 +717,15 @@ public class MediaDetection { return emptyList(); } } - + return movieIndex; } - - + public static List matchMovieName(final Collection files, boolean strict, int maxStartIndex) throws Exception { // cross-reference file / folder name with movie list final HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(maxStartIndex); final Map matchMap = new HashMap(); - + for (Entry movie : getMovieIndex()) { for (String name : files) { String movieIdentifier = movie.getKey(); @@ -760,25 +743,24 @@ public class MediaDetection { } } } - + // sort by length of name match (descending) List results = new ArrayList(matchMap.keySet()); sort(results, new Comparator() { - + @Override public int compare(Movie a, Movie b) { return Integer.valueOf(matchMap.get(b).length()).compareTo(Integer.valueOf(matchMap.get(a).length())); } }); - + return results; } - - + public static List matchMovieFromStringWithoutSpacing(Collection names, boolean strict) throws IOException { // clear name of punctuation, spacing, and leading 'The' or 'A' that are common causes for word-lookup to fail Pattern spacing = Pattern.compile("(^(?i)(The|A)\\b)|[\\p{Punct}\\p{Space}]+"); - + List terms = new ArrayList(names.size()); for (String it : names) { String term = spacing.matcher(it).replaceAll("").toLowerCase(); @@ -786,11 +768,11 @@ public class MediaDetection { terms.add(term); // only consider words, not just random letters } } - + // similarity threshold based on strict/non-strict SimilarityMetric metric = new NameSimilarityMetric(); float similarityThreshold = strict ? 0.9f : 0.5f; - + LinkedList movies = new LinkedList(); for (Entry it : getMovieIndex()) { String name = spacing.matcher(it.getKey()).replaceAll("").toLowerCase(); @@ -806,23 +788,22 @@ public class MediaDetection { } } } - + return new ArrayList(movies); } - - + private static Collection queryMovieByFileName(Collection files, MovieIdentificationService queryLookupService, Locale locale) throws Exception { // remove blacklisted terms List querySet = new ArrayList(); querySet.addAll(stripReleaseInfo(files, true)); querySet.addAll(stripReleaseInfo(files, false)); - + // remove duplicates querySet = getUniqueQuerySet(stripBlacklistedTerms(querySet)); - + // DEBUG // System.out.format("Query %s: %s%n", queryLookupService.getName(), querySet); - + final Map probabilityMap = new LinkedHashMap(); final SimilarityMetric metric = getMovieMatchMetric(); for (String query : querySet) { @@ -830,30 +811,30 @@ public class MediaDetection { probabilityMap.put(movie, metric.getSimilarity(query, movie)); } } - + // sort by similarity to original query (descending) List results = new ArrayList(probabilityMap.keySet()); sort(results, new Comparator() { - + @Override public int compare(Movie a, Movie b) { return probabilityMap.get(b).compareTo(probabilityMap.get(a)); } }); - + return results; } - - + private static List getUniqueQuerySet(Collection terms) { Map unique = new LinkedHashMap(); for (String it : terms) { - unique.put(normalizePunctuation(it).toLowerCase(), it); + if (it.length() > 0) { + unique.put(normalizePunctuation(it).toLowerCase(), it); + } } return new ArrayList(unique.values()); } - - + public static String stripReleaseInfo(String name) { try { return releaseInfo.cleanRelease(singleton(name), true).iterator().next(); @@ -863,13 +844,11 @@ public class MediaDetection { throw new RuntimeException(e); } } - - + public static List stripReleaseInfo(Collection names, boolean strict) throws IOException { return releaseInfo.cleanRelease(names, strict); } - - + public static List stripBlacklistedTerms(Collection names) throws IOException { Pattern blacklist = releaseInfo.getBlacklistPattern(); List acceptables = new ArrayList(names.size()); @@ -880,8 +859,7 @@ public class MediaDetection { } return acceptables; } - - + public static Set grepImdbIdFor(File file) throws Exception { Set collection = new LinkedHashSet(); List nfoFiles = new ArrayList(); @@ -890,7 +868,7 @@ public class MediaDetection { } else if (file.getParentFile().isDirectory()) { addAll(nfoFiles, file.getParentFile().listFiles(NFO_FILES)); } - + // parse ids from nfo files for (File nfo : nfoFiles) { try { @@ -900,14 +878,13 @@ public class MediaDetection { Logger.getLogger(MediaDetection.class.getClass().getName()).log(Level.WARNING, "Failed to read nfo: " + e.getMessage()); } } - + return collection; } - - + public static Set lookupSeriesNameByInfoFile(Collection files, Locale language) throws Exception { Set names = new LinkedHashSet(); - + SortedSet folders = new TreeSet(reverseOrder()); for (File f : files) { for (int i = 0; i < 2 && f.getParentFile() != null; i++) { @@ -915,22 +892,22 @@ public class MediaDetection { folders.add(f); } } - + // search for id in sibling nfo files for (File folder : folders) { if (!folder.exists()) continue; - + for (File nfo : folder.listFiles(NFO_FILES)) { String text = new String(readFile(nfo), "UTF-8"); - + for (int imdbid : grepImdbId(text)) { TheTVDBSearchResult series = WebServices.TheTVDB.lookupByIMDbID(imdbid, language); if (series != null) { names.add(series); } } - + for (int tvdbid : grepTheTvdbId(text)) { TheTVDBSearchResult series = WebServices.TheTVDB.lookupByID(tvdbid, language); if (series != null) { @@ -939,24 +916,22 @@ public class MediaDetection { } } } - + return names; } - - + public static Set grepImdbId(CharSequence text) { // scan for imdb id patterns like tt1234567 Matcher imdbMatch = Pattern.compile("(?<=tt)\\d{7}").matcher(text); Set collection = new LinkedHashSet(); - + while (imdbMatch.find()) { collection.add(Integer.parseInt(imdbMatch.group())); } - + return collection; } - - + public static Set grepTheTvdbId(CharSequence text) { // scan for thetvdb id patterns like http://www.thetvdb.com/?tab=series&id=78874&lid=14 Set collection = new LinkedHashSet(); @@ -973,36 +948,31 @@ public class MediaDetection { // parse for thetvdb urls, ignore everything else } } - + return collection; } - - + public static Movie grepMovie(File nfo, MovieIdentificationService resolver, Locale locale) throws Exception { return resolver.getMovieDescriptor(grepImdbId(new String(readFile(nfo), "UTF-8")).iterator().next(), locale); } - - + public static SeriesInfo grepSeries(File nfo, Locale locale) throws Exception { return WebServices.TheTVDB.getSeriesInfoByID(grepTheTvdbId(new String(readFile(nfo), "UTF-8")).iterator().next(), locale); } - - + /* * Heavy-duty name matcher used for matching a file to or more movies (out of a list of ~50k) */ private static class HighPerformanceMatcher extends CommonSequenceMatcher { - + private static final Collator collator = getLenientCollator(Locale.ENGLISH); - + private static final Map transformCache = synchronizedMap(new HashMap(65536)); - - + public HighPerformanceMatcher(int maxStartIndex) { super(collator, maxStartIndex, true); } - - + @Override protected CollationKey[] split(String sequence) { CollationKey[] value = transformCache.get(sequence); @@ -1012,19 +982,17 @@ public class MediaDetection { } return value; } - - + public String normalize(String sequence) { return normalizePunctuation(sequence); // only normalize punctuation, make sure we keep the year (important for movie matching) } } - - + public static void storeMetaInfo(File file, Object model) { // only for Episode / Movie objects if ((model instanceof Episode || model instanceof Movie) && file.exists()) { MetaAttributes xattr = new MetaAttributes(file); - + // set creation date to episode / movie release date try { if (model instanceof Episode) { @@ -1041,8 +1009,8 @@ public class MediaDetection { } catch (Exception e) { Logger.getLogger(MediaDetection.class.getClass().getName()).warning("Failed to set creation date: " + e.getMessage()); } - - // store original name and model as xattr + + // store original name and model as xattr try { if (xattr.getOriginalName() == null) { xattr.setOriginalName(file.getName()); @@ -1051,7 +1019,7 @@ public class MediaDetection { } catch (Exception e) { Logger.getLogger(MediaDetection.class.getClass().getName()).warning("Failed to set xattr: " + e.getMessage()); } - + } } } diff --git a/website/data/query-blacklist.txt b/website/data/query-blacklist.txt index d7fd3f71..a3a47347 100644 --- a/website/data/query-blacklist.txt +++ b/website/data/query-blacklist.txt @@ -9,6 +9,7 @@ [1-9].?of.?[1-9] \bThe$ \d{2,4}.\b\d{2}.\b\d{2}.\b\d{2}.\b\d{2} +\d{3,4}[pi] ^(TV.)?(Show|Serie)[s]? ^[0-9]{1,2}[.] ^[A-Z0-9]$ diff --git a/website/data/release-groups.txt b/website/data/release-groups.txt index 1bce29d3..9ebdcd3e 100644 --- a/website/data/release-groups.txt +++ b/website/data/release-groups.txt @@ -1368,6 +1368,7 @@ NEPTUNE NERDHD NeRoZ NES +NEUTRINO NEW.SOURCE NewArtRiot NewSubs