* Better support for Name AKA Name YEAR naming scheme

@see http://www.filebot.net/forums/viewtopic.php?f=6&t=882
This commit is contained in:
Reinhard Pointner 2013-08-13 04:48:13 +00:00
parent e4fdcb1884
commit ee3c1d25d7
3 changed files with 190 additions and 220 deletions

View File

@ -1,14 +1,22 @@
package net.sourceforge.filebot.media;
import static java.util.Collections.*;
import static java.util.regex.Pattern.*;
import static net.sourceforge.filebot.MediaTypes.*;
import static net.sourceforge.filebot.Settings.*;
import static net.sourceforge.filebot.similarity.CommonSequenceMatcher.*;
import static net.sourceforge.filebot.similarity.Normalization.*;
import static net.sourceforge.tuned.FileUtilities.*;
import static java.util.Collections.addAll;
import static java.util.Collections.emptyList;
import static java.util.Collections.reverseOrder;
import static java.util.Collections.singleton;
import static java.util.Collections.sort;
import static java.util.Collections.synchronizedMap;
import static java.util.regex.Pattern.compile;
import static net.sourceforge.filebot.MediaTypes.NFO_FILES;
import static net.sourceforge.filebot.Settings.useExtendedFileAttributes;
import static net.sourceforge.filebot.similarity.CommonSequenceMatcher.getLenientCollator;
import static net.sourceforge.filebot.similarity.Normalization.normalizeBrackets;
import static net.sourceforge.filebot.similarity.Normalization.normalizePunctuation;
import static net.sourceforge.tuned.FileUtilities.filter;
import static net.sourceforge.tuned.FileUtilities.getName;
import static net.sourceforge.tuned.FileUtilities.listFiles;
import static net.sourceforge.tuned.FileUtilities.mapByFolder;
import static net.sourceforge.tuned.FileUtilities.readFile;
import java.io.File;
import java.io.FileFilter;
@ -62,7 +70,6 @@ import net.sourceforge.filebot.web.SearchResult;
import net.sourceforge.filebot.web.TheTVDBClient.SeriesInfo;
import net.sourceforge.filebot.web.TheTVDBSearchResult;
public class MediaDetection {
public static final ReleaseInfo releaseInfo = new ReleaseInfo();
@ -70,7 +77,6 @@ public class MediaDetection {
private static FileFilter diskFolder;
private static FileFilter clutterFile;
public static FileFilter getDiskFolderFilter() {
if (diskFolder == null) {
diskFolder = releaseInfo.getDiskFolderFilter();
@ -78,7 +84,6 @@ public class MediaDetection {
return diskFolder;
}
public static FileFilter getClutterFileFilter() throws IOException {
if (clutterFile == null) {
clutterFile = releaseInfo.getClutterFileFilter();
@ -86,37 +91,30 @@ public class MediaDetection {
return clutterFile;
}
public static boolean isDiskFolder(File folder) {
return getDiskFolderFilter().accept(folder);
}
public static boolean isClutterFile(File file) throws IOException {
return getClutterFileFilter().accept(file);
}
public static boolean isEpisode(String name, boolean strict) {
return parseEpisodeNumber(name, strict) != null || parseDate(name) != null;
}
public static List<SxE> parseEpisodeNumber(String string, boolean strict) {
return new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict).match(string);
}
public static List<SxE> parseEpisodeNumber(File file, boolean strict) {
return new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict).match(file);
}
public static Date parseDate(Object object) {
return new DateMetric().parse(object);
}
public static Map<Set<File>, Set<String>> mapSeriesNamesByFiles(Collection<File> files, Locale locale) throws Exception {
// map series names by folder
Map<File, Set<String>> seriesNamesByFolder = new HashMap<File, Set<String>>();
@ -236,7 +234,6 @@ public class MediaDetection {
return batchSets;
}
public static Object getEpisodeIdentifier(CharSequence name, boolean strict) {
// check SxE first
Object match = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict).match(name);
@ -248,7 +245,6 @@ public class MediaDetection {
return match;
}
public static List<String> detectSeriesNames(Collection<File> files, Locale locale) throws Exception {
List<String> names = new ArrayList<String>();
@ -361,7 +357,6 @@ public class MediaDetection {
return getUniqueQuerySet(names);
}
public static List<String> matchSeriesByDirectMapping(Collection<File> files) throws Exception {
Map<Pattern, String> seriesDirectMappings = releaseInfo.getSeriesDirectMappings();
List<String> matches = new ArrayList<String>();
@ -379,7 +374,6 @@ public class MediaDetection {
private static List<Entry<String, SearchResult>> seriesIndex = new ArrayList<Entry<String, SearchResult>>(75000);
public static synchronized List<Entry<String, SearchResult>> getSeriesIndex() throws IOException {
if (seriesIndex.isEmpty()) {
try {
@ -402,7 +396,6 @@ public class MediaDetection {
return seriesIndex;
}
public static List<String> matchSeriesByName(Collection<String> names, int maxStartIndex) throws Exception {
HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(maxStartIndex);
List<String> matches = new ArrayList<String>();
@ -433,7 +426,6 @@ public class MediaDetection {
return matches;
}
public static List<SearchResult> matchSeriesFromStringWithoutSpacing(Collection<String> names, boolean strict) throws IOException {
// clear name of punctuation, spacing, and leading 'The' or 'A' that are common causes for word-lookup to fail
Pattern spacing = Pattern.compile("(^(?i)(The|A)\\b)|[\\p{Punct}\\p{Space}]+");
@ -465,7 +457,6 @@ public class MediaDetection {
return seriesList;
}
public static Collection<Movie> detectMovie(File movieFile, MovieIdentificationService hashLookupService, MovieIdentificationService queryLookupService, Locale locale, boolean strict) throws Exception {
Set<Movie> options = new LinkedHashSet<Movie>();
@ -566,16 +557,19 @@ public class MediaDetection {
// try query without year as it sometimes messes up results if years don't match properly (movie release years vs dvd release year, etc)
if (results.isEmpty() && !strict) {
List<String> termsWithoutYear = new ArrayList<String>();
List<String> lastResortQueryList = new ArrayList<String>();
Pattern yearPattern = Pattern.compile("(?:19|20)\\d{2}");
Pattern akaPattern = Pattern.compile("\\bAKA\\b", Pattern.CASE_INSENSITIVE);
for (String term : terms) {
Matcher m = yearPattern.matcher(term);
if (m.find()) {
termsWithoutYear.add(m.replaceAll("").trim());
if (yearPattern.matcher(term).find() || akaPattern.matcher(term).find()) {
// try to separate AKA titles as well into separate searches
for (String mn : akaPattern.split(yearPattern.matcher(term).replaceAll(""))) {
lastResortQueryList.add(mn.trim());
}
}
}
if (termsWithoutYear.size() > 0) {
results = queryMovieByFileName(termsWithoutYear, queryLookupService, locale);
if (lastResortQueryList.size() > 0) {
results = queryMovieByFileName(lastResortQueryList, queryLookupService, locale);
}
}
@ -589,13 +583,11 @@ public class MediaDetection {
return sortBySimilarity(options, terms);
}
public static SimilarityMetric getMovieMatchMetric() {
return new MetricAvg(new SequenceMatchSimilarity(), new NameSimilarityMetric(), new SequenceMatchSimilarity(0, true), new NumericSimilarityMetric() {
private Pattern year = Pattern.compile("\\b\\d{4}\\b");
@Override
protected String normalize(Object object) {
Matcher ym = year.matcher(object.toString());
@ -606,7 +598,6 @@ public class MediaDetection {
return sb.toString().trim();
}
@Override
public float getSimilarity(Object o1, Object o2) {
return super.getSimilarity(o1, o2) * 2; // DOUBLE WEIGHT FOR YEAR MATCH
@ -614,7 +605,6 @@ public class MediaDetection {
});
}
public static <T> List<T> sortBySimilarity(Collection<T> options, Collection<String> terms) throws IOException {
List<String> paragon = stripReleaseInfo(terms, true);
List<T> sorted = new ArrayList<T>(options);
@ -626,7 +616,6 @@ public class MediaDetection {
return sorted;
}
public static String reduceMovieName(String name, boolean strict) throws IOException {
Matcher matcher = compile(strict ? "^(.+)[\\[\\(]((?:19|20)\\d{2})[\\]\\)]" : "^(.+?)((?:19|20)\\d{2})").matcher(name);
if (matcher.find()) {
@ -635,7 +624,6 @@ public class MediaDetection {
return null;
}
public static Collection<String> reduceMovieNamePermutations(Collection<String> terms) throws IOException {
LinkedList<String> names = new LinkedList<String>();
@ -655,7 +643,6 @@ public class MediaDetection {
return names;
}
public static File guessMovieFolder(File movieFile) throws Exception {
File folder = guessMovieFolderWithoutSanity(movieFile);
@ -667,7 +654,6 @@ public class MediaDetection {
return folder;
}
private static File guessMovieFolderWithoutSanity(File movieFile) throws Exception {
// special case for folder mode
if (movieFile.isDirectory()) {
@ -712,7 +698,6 @@ public class MediaDetection {
return null;
}
public static Movie checkMovie(File file, boolean strict) throws Exception {
List<Movie> matches = file != null ? matchMovieName(singleton(file.getName()), strict, 4) : null;
return matches != null && matches.size() > 0 ? matches.get(0) : null;
@ -720,7 +705,6 @@ public class MediaDetection {
private static List<Entry<String, Movie>> movieIndex = new ArrayList<Entry<String, Movie>>(100000);
public static synchronized List<Entry<String, Movie>> getMovieIndex() throws IOException {
if (movieIndex.isEmpty()) {
try {
@ -737,7 +721,6 @@ public class MediaDetection {
return movieIndex;
}
public static List<Movie> matchMovieName(final Collection<String> files, boolean strict, int maxStartIndex) throws Exception {
// cross-reference file / folder name with movie list
final HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(maxStartIndex);
@ -774,7 +757,6 @@ public class MediaDetection {
return results;
}
public static List<Movie> matchMovieFromStringWithoutSpacing(Collection<String> names, boolean strict) throws IOException {
// clear name of punctuation, spacing, and leading 'The' or 'A' that are common causes for word-lookup to fail
Pattern spacing = Pattern.compile("(^(?i)(The|A)\\b)|[\\p{Punct}\\p{Space}]+");
@ -810,7 +792,6 @@ public class MediaDetection {
return new ArrayList<Movie>(movies);
}
private static Collection<Movie> queryMovieByFileName(Collection<String> files, MovieIdentificationService queryLookupService, Locale locale) throws Exception {
// remove blacklisted terms
List<String> querySet = new ArrayList<String>();
@ -844,16 +825,16 @@ public class MediaDetection {
return results;
}
private static List<String> getUniqueQuerySet(Collection<String> terms) {
Map<String, String> unique = new LinkedHashMap<String, String>();
for (String it : terms) {
unique.put(normalizePunctuation(it).toLowerCase(), it);
if (it.length() > 0) {
unique.put(normalizePunctuation(it).toLowerCase(), it);
}
}
return new ArrayList<String>(unique.values());
}
public static String stripReleaseInfo(String name) {
try {
return releaseInfo.cleanRelease(singleton(name), true).iterator().next();
@ -864,12 +845,10 @@ public class MediaDetection {
}
}
public static List<String> stripReleaseInfo(Collection<String> names, boolean strict) throws IOException {
return releaseInfo.cleanRelease(names, strict);
}
public static List<String> stripBlacklistedTerms(Collection<String> names) throws IOException {
Pattern blacklist = releaseInfo.getBlacklistPattern();
List<String> acceptables = new ArrayList<String>(names.size());
@ -881,7 +860,6 @@ public class MediaDetection {
return acceptables;
}
public static Set<Integer> grepImdbIdFor(File file) throws Exception {
Set<Integer> collection = new LinkedHashSet<Integer>();
List<File> nfoFiles = new ArrayList<File>();
@ -904,7 +882,6 @@ public class MediaDetection {
return collection;
}
public static Set<SearchResult> lookupSeriesNameByInfoFile(Collection<File> files, Locale language) throws Exception {
Set<SearchResult> names = new LinkedHashSet<SearchResult>();
@ -943,7 +920,6 @@ public class MediaDetection {
return names;
}
public static Set<Integer> grepImdbId(CharSequence text) {
// scan for imdb id patterns like tt1234567
Matcher imdbMatch = Pattern.compile("(?<=tt)\\d{7}").matcher(text);
@ -956,7 +932,6 @@ public class MediaDetection {
return collection;
}
public static Set<Integer> grepTheTvdbId(CharSequence text) {
// scan for thetvdb id patterns like http://www.thetvdb.com/?tab=series&id=78874&lid=14
Set<Integer> collection = new LinkedHashSet<Integer>();
@ -977,17 +952,14 @@ public class MediaDetection {
return collection;
}
public static Movie grepMovie(File nfo, MovieIdentificationService resolver, Locale locale) throws Exception {
return resolver.getMovieDescriptor(grepImdbId(new String(readFile(nfo), "UTF-8")).iterator().next(), locale);
}
public static SeriesInfo grepSeries(File nfo, Locale locale) throws Exception {
return WebServices.TheTVDB.getSeriesInfoByID(grepTheTvdbId(new String(readFile(nfo), "UTF-8")).iterator().next(), locale);
}
/*
* Heavy-duty name matcher used for matching a file to or more movies (out of a list of ~50k)
*/
@ -997,12 +969,10 @@ public class MediaDetection {
private static final Map<String, CollationKey[]> transformCache = synchronizedMap(new HashMap<String, CollationKey[]>(65536));
public HighPerformanceMatcher(int maxStartIndex) {
super(collator, maxStartIndex, true);
}
@Override
protected CollationKey[] split(String sequence) {
CollationKey[] value = transformCache.get(sequence);
@ -1013,13 +983,11 @@ public class MediaDetection {
return value;
}
public String normalize(String sequence) {
return normalizePunctuation(sequence); // only normalize punctuation, make sure we keep the year (important for movie matching)
}
}
public static void storeMetaInfo(File file, Object model) {
// only for Episode / Movie objects
if ((model instanceof Episode || model instanceof Movie) && file.exists()) {

View File

@ -9,6 +9,7 @@
[1-9].?of.?[1-9]
\bThe$
\d{2,4}.\b\d{2}.\b\d{2}.\b\d{2}.\b\d{2}
\d{3,4}[pi]
^(TV.)?(Show|Serie)[s]?
^[0-9]{1,2}[.]
^[A-Z0-9]$

View File

@ -1368,6 +1368,7 @@ NEPTUNE
NERDHD
NeRoZ
NES
NEUTRINO
NEW.SOURCE
NewArtRiot
NewSubs