* Better support for Name AKA Name YEAR naming scheme
@see http://www.filebot.net/forums/viewtopic.php?f=6&t=882
This commit is contained in:
parent
e4fdcb1884
commit
ee3c1d25d7
|
@ -1,14 +1,22 @@
|
|||
|
||||
package net.sourceforge.filebot.media;
|
||||
|
||||
|
||||
import static java.util.Collections.*;
|
||||
import static java.util.regex.Pattern.*;
|
||||
import static net.sourceforge.filebot.MediaTypes.*;
|
||||
import static net.sourceforge.filebot.Settings.*;
|
||||
import static net.sourceforge.filebot.similarity.CommonSequenceMatcher.*;
|
||||
import static net.sourceforge.filebot.similarity.Normalization.*;
|
||||
import static net.sourceforge.tuned.FileUtilities.*;
|
||||
import static java.util.Collections.addAll;
|
||||
import static java.util.Collections.emptyList;
|
||||
import static java.util.Collections.reverseOrder;
|
||||
import static java.util.Collections.singleton;
|
||||
import static java.util.Collections.sort;
|
||||
import static java.util.Collections.synchronizedMap;
|
||||
import static java.util.regex.Pattern.compile;
|
||||
import static net.sourceforge.filebot.MediaTypes.NFO_FILES;
|
||||
import static net.sourceforge.filebot.Settings.useExtendedFileAttributes;
|
||||
import static net.sourceforge.filebot.similarity.CommonSequenceMatcher.getLenientCollator;
|
||||
import static net.sourceforge.filebot.similarity.Normalization.normalizeBrackets;
|
||||
import static net.sourceforge.filebot.similarity.Normalization.normalizePunctuation;
|
||||
import static net.sourceforge.tuned.FileUtilities.filter;
|
||||
import static net.sourceforge.tuned.FileUtilities.getName;
|
||||
import static net.sourceforge.tuned.FileUtilities.listFiles;
|
||||
import static net.sourceforge.tuned.FileUtilities.mapByFolder;
|
||||
import static net.sourceforge.tuned.FileUtilities.readFile;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileFilter;
|
||||
|
@ -62,7 +70,6 @@ import net.sourceforge.filebot.web.SearchResult;
|
|||
import net.sourceforge.filebot.web.TheTVDBClient.SeriesInfo;
|
||||
import net.sourceforge.filebot.web.TheTVDBSearchResult;
|
||||
|
||||
|
||||
public class MediaDetection {
|
||||
|
||||
public static final ReleaseInfo releaseInfo = new ReleaseInfo();
|
||||
|
@ -70,7 +77,6 @@ public class MediaDetection {
|
|||
private static FileFilter diskFolder;
|
||||
private static FileFilter clutterFile;
|
||||
|
||||
|
||||
public static FileFilter getDiskFolderFilter() {
|
||||
if (diskFolder == null) {
|
||||
diskFolder = releaseInfo.getDiskFolderFilter();
|
||||
|
@ -78,7 +84,6 @@ public class MediaDetection {
|
|||
return diskFolder;
|
||||
}
|
||||
|
||||
|
||||
public static FileFilter getClutterFileFilter() throws IOException {
|
||||
if (clutterFile == null) {
|
||||
clutterFile = releaseInfo.getClutterFileFilter();
|
||||
|
@ -86,37 +91,30 @@ public class MediaDetection {
|
|||
return clutterFile;
|
||||
}
|
||||
|
||||
|
||||
public static boolean isDiskFolder(File folder) {
|
||||
return getDiskFolderFilter().accept(folder);
|
||||
}
|
||||
|
||||
|
||||
public static boolean isClutterFile(File file) throws IOException {
|
||||
return getClutterFileFilter().accept(file);
|
||||
}
|
||||
|
||||
|
||||
public static boolean isEpisode(String name, boolean strict) {
|
||||
return parseEpisodeNumber(name, strict) != null || parseDate(name) != null;
|
||||
}
|
||||
|
||||
|
||||
public static List<SxE> parseEpisodeNumber(String string, boolean strict) {
|
||||
return new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict).match(string);
|
||||
}
|
||||
|
||||
|
||||
public static List<SxE> parseEpisodeNumber(File file, boolean strict) {
|
||||
return new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict).match(file);
|
||||
}
|
||||
|
||||
|
||||
public static Date parseDate(Object object) {
|
||||
return new DateMetric().parse(object);
|
||||
}
|
||||
|
||||
|
||||
public static Map<Set<File>, Set<String>> mapSeriesNamesByFiles(Collection<File> files, Locale locale) throws Exception {
|
||||
// map series names by folder
|
||||
Map<File, Set<String>> seriesNamesByFolder = new HashMap<File, Set<String>>();
|
||||
|
@ -236,7 +234,6 @@ public class MediaDetection {
|
|||
return batchSets;
|
||||
}
|
||||
|
||||
|
||||
public static Object getEpisodeIdentifier(CharSequence name, boolean strict) {
|
||||
// check SxE first
|
||||
Object match = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict).match(name);
|
||||
|
@ -248,7 +245,6 @@ public class MediaDetection {
|
|||
return match;
|
||||
}
|
||||
|
||||
|
||||
public static List<String> detectSeriesNames(Collection<File> files, Locale locale) throws Exception {
|
||||
List<String> names = new ArrayList<String>();
|
||||
|
||||
|
@ -361,7 +357,6 @@ public class MediaDetection {
|
|||
return getUniqueQuerySet(names);
|
||||
}
|
||||
|
||||
|
||||
public static List<String> matchSeriesByDirectMapping(Collection<File> files) throws Exception {
|
||||
Map<Pattern, String> seriesDirectMappings = releaseInfo.getSeriesDirectMappings();
|
||||
List<String> matches = new ArrayList<String>();
|
||||
|
@ -379,7 +374,6 @@ public class MediaDetection {
|
|||
|
||||
private static List<Entry<String, SearchResult>> seriesIndex = new ArrayList<Entry<String, SearchResult>>(75000);
|
||||
|
||||
|
||||
public static synchronized List<Entry<String, SearchResult>> getSeriesIndex() throws IOException {
|
||||
if (seriesIndex.isEmpty()) {
|
||||
try {
|
||||
|
@ -402,7 +396,6 @@ public class MediaDetection {
|
|||
return seriesIndex;
|
||||
}
|
||||
|
||||
|
||||
public static List<String> matchSeriesByName(Collection<String> names, int maxStartIndex) throws Exception {
|
||||
HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(maxStartIndex);
|
||||
List<String> matches = new ArrayList<String>();
|
||||
|
@ -433,7 +426,6 @@ public class MediaDetection {
|
|||
return matches;
|
||||
}
|
||||
|
||||
|
||||
public static List<SearchResult> matchSeriesFromStringWithoutSpacing(Collection<String> names, boolean strict) throws IOException {
|
||||
// clear name of punctuation, spacing, and leading 'The' or 'A' that are common causes for word-lookup to fail
|
||||
Pattern spacing = Pattern.compile("(^(?i)(The|A)\\b)|[\\p{Punct}\\p{Space}]+");
|
||||
|
@ -465,7 +457,6 @@ public class MediaDetection {
|
|||
return seriesList;
|
||||
}
|
||||
|
||||
|
||||
public static Collection<Movie> detectMovie(File movieFile, MovieIdentificationService hashLookupService, MovieIdentificationService queryLookupService, Locale locale, boolean strict) throws Exception {
|
||||
Set<Movie> options = new LinkedHashSet<Movie>();
|
||||
|
||||
|
@ -566,16 +557,19 @@ public class MediaDetection {
|
|||
|
||||
// try query without year as it sometimes messes up results if years don't match properly (movie release years vs dvd release year, etc)
|
||||
if (results.isEmpty() && !strict) {
|
||||
List<String> termsWithoutYear = new ArrayList<String>();
|
||||
List<String> lastResortQueryList = new ArrayList<String>();
|
||||
Pattern yearPattern = Pattern.compile("(?:19|20)\\d{2}");
|
||||
Pattern akaPattern = Pattern.compile("\\bAKA\\b", Pattern.CASE_INSENSITIVE);
|
||||
for (String term : terms) {
|
||||
Matcher m = yearPattern.matcher(term);
|
||||
if (m.find()) {
|
||||
termsWithoutYear.add(m.replaceAll("").trim());
|
||||
if (yearPattern.matcher(term).find() || akaPattern.matcher(term).find()) {
|
||||
// try to separate AKA titles as well into separate searches
|
||||
for (String mn : akaPattern.split(yearPattern.matcher(term).replaceAll(""))) {
|
||||
lastResortQueryList.add(mn.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
if (termsWithoutYear.size() > 0) {
|
||||
results = queryMovieByFileName(termsWithoutYear, queryLookupService, locale);
|
||||
if (lastResortQueryList.size() > 0) {
|
||||
results = queryMovieByFileName(lastResortQueryList, queryLookupService, locale);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -589,13 +583,11 @@ public class MediaDetection {
|
|||
return sortBySimilarity(options, terms);
|
||||
}
|
||||
|
||||
|
||||
public static SimilarityMetric getMovieMatchMetric() {
|
||||
return new MetricAvg(new SequenceMatchSimilarity(), new NameSimilarityMetric(), new SequenceMatchSimilarity(0, true), new NumericSimilarityMetric() {
|
||||
|
||||
private Pattern year = Pattern.compile("\\b\\d{4}\\b");
|
||||
|
||||
|
||||
@Override
|
||||
protected String normalize(Object object) {
|
||||
Matcher ym = year.matcher(object.toString());
|
||||
|
@ -606,7 +598,6 @@ public class MediaDetection {
|
|||
return sb.toString().trim();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public float getSimilarity(Object o1, Object o2) {
|
||||
return super.getSimilarity(o1, o2) * 2; // DOUBLE WEIGHT FOR YEAR MATCH
|
||||
|
@ -614,7 +605,6 @@ public class MediaDetection {
|
|||
});
|
||||
}
|
||||
|
||||
|
||||
public static <T> List<T> sortBySimilarity(Collection<T> options, Collection<String> terms) throws IOException {
|
||||
List<String> paragon = stripReleaseInfo(terms, true);
|
||||
List<T> sorted = new ArrayList<T>(options);
|
||||
|
@ -626,7 +616,6 @@ public class MediaDetection {
|
|||
return sorted;
|
||||
}
|
||||
|
||||
|
||||
public static String reduceMovieName(String name, boolean strict) throws IOException {
|
||||
Matcher matcher = compile(strict ? "^(.+)[\\[\\(]((?:19|20)\\d{2})[\\]\\)]" : "^(.+?)((?:19|20)\\d{2})").matcher(name);
|
||||
if (matcher.find()) {
|
||||
|
@ -635,7 +624,6 @@ public class MediaDetection {
|
|||
return null;
|
||||
}
|
||||
|
||||
|
||||
public static Collection<String> reduceMovieNamePermutations(Collection<String> terms) throws IOException {
|
||||
LinkedList<String> names = new LinkedList<String>();
|
||||
|
||||
|
@ -655,7 +643,6 @@ public class MediaDetection {
|
|||
return names;
|
||||
}
|
||||
|
||||
|
||||
public static File guessMovieFolder(File movieFile) throws Exception {
|
||||
File folder = guessMovieFolderWithoutSanity(movieFile);
|
||||
|
||||
|
@ -667,7 +654,6 @@ public class MediaDetection {
|
|||
return folder;
|
||||
}
|
||||
|
||||
|
||||
private static File guessMovieFolderWithoutSanity(File movieFile) throws Exception {
|
||||
// special case for folder mode
|
||||
if (movieFile.isDirectory()) {
|
||||
|
@ -712,7 +698,6 @@ public class MediaDetection {
|
|||
return null;
|
||||
}
|
||||
|
||||
|
||||
public static Movie checkMovie(File file, boolean strict) throws Exception {
|
||||
List<Movie> matches = file != null ? matchMovieName(singleton(file.getName()), strict, 4) : null;
|
||||
return matches != null && matches.size() > 0 ? matches.get(0) : null;
|
||||
|
@ -720,7 +705,6 @@ public class MediaDetection {
|
|||
|
||||
private static List<Entry<String, Movie>> movieIndex = new ArrayList<Entry<String, Movie>>(100000);
|
||||
|
||||
|
||||
public static synchronized List<Entry<String, Movie>> getMovieIndex() throws IOException {
|
||||
if (movieIndex.isEmpty()) {
|
||||
try {
|
||||
|
@ -737,7 +721,6 @@ public class MediaDetection {
|
|||
return movieIndex;
|
||||
}
|
||||
|
||||
|
||||
public static List<Movie> matchMovieName(final Collection<String> files, boolean strict, int maxStartIndex) throws Exception {
|
||||
// cross-reference file / folder name with movie list
|
||||
final HighPerformanceMatcher nameMatcher = new HighPerformanceMatcher(maxStartIndex);
|
||||
|
@ -774,7 +757,6 @@ public class MediaDetection {
|
|||
return results;
|
||||
}
|
||||
|
||||
|
||||
public static List<Movie> matchMovieFromStringWithoutSpacing(Collection<String> names, boolean strict) throws IOException {
|
||||
// clear name of punctuation, spacing, and leading 'The' or 'A' that are common causes for word-lookup to fail
|
||||
Pattern spacing = Pattern.compile("(^(?i)(The|A)\\b)|[\\p{Punct}\\p{Space}]+");
|
||||
|
@ -810,7 +792,6 @@ public class MediaDetection {
|
|||
return new ArrayList<Movie>(movies);
|
||||
}
|
||||
|
||||
|
||||
private static Collection<Movie> queryMovieByFileName(Collection<String> files, MovieIdentificationService queryLookupService, Locale locale) throws Exception {
|
||||
// remove blacklisted terms
|
||||
List<String> querySet = new ArrayList<String>();
|
||||
|
@ -844,16 +825,16 @@ public class MediaDetection {
|
|||
return results;
|
||||
}
|
||||
|
||||
|
||||
private static List<String> getUniqueQuerySet(Collection<String> terms) {
|
||||
Map<String, String> unique = new LinkedHashMap<String, String>();
|
||||
for (String it : terms) {
|
||||
unique.put(normalizePunctuation(it).toLowerCase(), it);
|
||||
if (it.length() > 0) {
|
||||
unique.put(normalizePunctuation(it).toLowerCase(), it);
|
||||
}
|
||||
}
|
||||
return new ArrayList<String>(unique.values());
|
||||
}
|
||||
|
||||
|
||||
public static String stripReleaseInfo(String name) {
|
||||
try {
|
||||
return releaseInfo.cleanRelease(singleton(name), true).iterator().next();
|
||||
|
@ -864,12 +845,10 @@ public class MediaDetection {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
public static List<String> stripReleaseInfo(Collection<String> names, boolean strict) throws IOException {
|
||||
return releaseInfo.cleanRelease(names, strict);
|
||||
}
|
||||
|
||||
|
||||
public static List<String> stripBlacklistedTerms(Collection<String> names) throws IOException {
|
||||
Pattern blacklist = releaseInfo.getBlacklistPattern();
|
||||
List<String> acceptables = new ArrayList<String>(names.size());
|
||||
|
@ -881,7 +860,6 @@ public class MediaDetection {
|
|||
return acceptables;
|
||||
}
|
||||
|
||||
|
||||
public static Set<Integer> grepImdbIdFor(File file) throws Exception {
|
||||
Set<Integer> collection = new LinkedHashSet<Integer>();
|
||||
List<File> nfoFiles = new ArrayList<File>();
|
||||
|
@ -904,7 +882,6 @@ public class MediaDetection {
|
|||
return collection;
|
||||
}
|
||||
|
||||
|
||||
public static Set<SearchResult> lookupSeriesNameByInfoFile(Collection<File> files, Locale language) throws Exception {
|
||||
Set<SearchResult> names = new LinkedHashSet<SearchResult>();
|
||||
|
||||
|
@ -943,7 +920,6 @@ public class MediaDetection {
|
|||
return names;
|
||||
}
|
||||
|
||||
|
||||
public static Set<Integer> grepImdbId(CharSequence text) {
|
||||
// scan for imdb id patterns like tt1234567
|
||||
Matcher imdbMatch = Pattern.compile("(?<=tt)\\d{7}").matcher(text);
|
||||
|
@ -956,7 +932,6 @@ public class MediaDetection {
|
|||
return collection;
|
||||
}
|
||||
|
||||
|
||||
public static Set<Integer> grepTheTvdbId(CharSequence text) {
|
||||
// scan for thetvdb id patterns like http://www.thetvdb.com/?tab=series&id=78874&lid=14
|
||||
Set<Integer> collection = new LinkedHashSet<Integer>();
|
||||
|
@ -977,17 +952,14 @@ public class MediaDetection {
|
|||
return collection;
|
||||
}
|
||||
|
||||
|
||||
public static Movie grepMovie(File nfo, MovieIdentificationService resolver, Locale locale) throws Exception {
|
||||
return resolver.getMovieDescriptor(grepImdbId(new String(readFile(nfo), "UTF-8")).iterator().next(), locale);
|
||||
}
|
||||
|
||||
|
||||
public static SeriesInfo grepSeries(File nfo, Locale locale) throws Exception {
|
||||
return WebServices.TheTVDB.getSeriesInfoByID(grepTheTvdbId(new String(readFile(nfo), "UTF-8")).iterator().next(), locale);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Heavy-duty name matcher used for matching a file to or more movies (out of a list of ~50k)
|
||||
*/
|
||||
|
@ -997,12 +969,10 @@ public class MediaDetection {
|
|||
|
||||
private static final Map<String, CollationKey[]> transformCache = synchronizedMap(new HashMap<String, CollationKey[]>(65536));
|
||||
|
||||
|
||||
public HighPerformanceMatcher(int maxStartIndex) {
|
||||
super(collator, maxStartIndex, true);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected CollationKey[] split(String sequence) {
|
||||
CollationKey[] value = transformCache.get(sequence);
|
||||
|
@ -1013,13 +983,11 @@ public class MediaDetection {
|
|||
return value;
|
||||
}
|
||||
|
||||
|
||||
public String normalize(String sequence) {
|
||||
return normalizePunctuation(sequence); // only normalize punctuation, make sure we keep the year (important for movie matching)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static void storeMetaInfo(File file, Object model) {
|
||||
// only for Episode / Movie objects
|
||||
if ((model instanceof Episode || model instanceof Movie) && file.exists()) {
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
[1-9].?of.?[1-9]
|
||||
\bThe$
|
||||
\d{2,4}.\b\d{2}.\b\d{2}.\b\d{2}.\b\d{2}
|
||||
\d{3,4}[pi]
|
||||
^(TV.)?(Show|Serie)[s]?
|
||||
^[0-9]{1,2}[.]
|
||||
^[A-Z0-9]$
|
||||
|
|
|
@ -1368,6 +1368,7 @@ NEPTUNE
|
|||
NERDHD
|
||||
NeRoZ
|
||||
NES
|
||||
NEUTRINO
|
||||
NEW.SOURCE
|
||||
NewArtRiot
|
||||
NewSubs
|
||||
|
|
Loading…
Reference in New Issue