* major performance improvements for mass-movie matching by optimizing data caching
* caching of requests didn't work in parallel searches because threads would always hit similar files (in order) at the same time and can't benefit from caching. new logic is one thread per folder as all files resulting in the same query are usually in the same folder
This commit is contained in:
parent
d33a907f2f
commit
d143e3feb5
|
@ -31,7 +31,6 @@ import java.util.Map.Entry;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
import java.util.WeakHashMap;
|
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
import java.util.logging.Logger;
|
import java.util.logging.Logger;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
|
@ -427,7 +426,6 @@ public class MediaDetection {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static List<Entry<String, Movie>> movieIndex;
|
private static List<Entry<String, Movie>> movieIndex;
|
||||||
|
|
||||||
|
|
||||||
|
@ -531,7 +529,7 @@ public class MediaDetection {
|
||||||
final SimilarityMetric metric = new NameSimilarityMetric();
|
final SimilarityMetric metric = new NameSimilarityMetric();
|
||||||
final Map<Movie, Float> probabilityMap = new LinkedHashMap<Movie, Float>();
|
final Map<Movie, Float> probabilityMap = new LinkedHashMap<Movie, Float>();
|
||||||
for (String query : querySet) {
|
for (String query : querySet) {
|
||||||
for (Movie movie : queryLookupService.searchMovie(query, locale)) {
|
for (Movie movie : queryLookupService.searchMovie(query.toLowerCase(), locale)) {
|
||||||
probabilityMap.put(movie, metric.getSimilarity(query, movie));
|
probabilityMap.put(movie, metric.getSimilarity(query, movie));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -660,7 +658,7 @@ public class MediaDetection {
|
||||||
|
|
||||||
private static final Collator collator = getLenientCollator(Locale.ENGLISH);
|
private static final Collator collator = getLenientCollator(Locale.ENGLISH);
|
||||||
|
|
||||||
private static final Map<String, CollationKey[]> transformCache = synchronizedMap(new WeakHashMap<String, CollationKey[]>(65536));
|
private static final Map<String, CollationKey[]> transformCache = synchronizedMap(new HashMap<String, CollationKey[]>(65536));
|
||||||
|
|
||||||
|
|
||||||
public HighPerformanceMatcher(int maxStartIndex) {
|
public HighPerformanceMatcher(int maxStartIndex) {
|
||||||
|
|
|
@ -29,7 +29,6 @@ import java.util.Map;
|
||||||
import java.util.Scanner;
|
import java.util.Scanner;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
import java.util.WeakHashMap;
|
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import java.util.zip.GZIPInputStream;
|
import java.util.zip.GZIPInputStream;
|
||||||
|
@ -102,21 +101,46 @@ public class ReleaseInfo {
|
||||||
return lastMatch;
|
return lastMatch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cached patterns
|
||||||
|
private Pattern[] strict_stopwords;
|
||||||
|
private Pattern[] strict_blacklist;
|
||||||
|
private Pattern[] nonstrict_stopwords;
|
||||||
|
private Pattern[] nonstrict_blacklist;
|
||||||
|
|
||||||
|
|
||||||
public List<String> cleanRelease(Collection<String> items, boolean strict) throws IOException {
|
public List<String> cleanRelease(Collection<String> items, boolean strict) throws IOException {
|
||||||
Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet();
|
Pattern[] stopwords;
|
||||||
|
Pattern[] blacklist;
|
||||||
|
|
||||||
Pattern clutterBracket = getClutterBracketPattern(strict);
|
// initialize cached patterns
|
||||||
Pattern releaseGroup = getReleaseGroupPattern(strict);
|
synchronized (this) {
|
||||||
Pattern languageSuffix = getLanguageSuffixPattern(languages);
|
stopwords = strict ? strict_stopwords : nonstrict_stopwords;
|
||||||
Pattern languageTag = getLanguageTagPattern(languages);
|
blacklist = strict ? strict_blacklist : nonstrict_blacklist;
|
||||||
Pattern videoSource = getVideoSourcePattern();
|
|
||||||
Pattern videoFormat = getVideoFormatPattern();
|
|
||||||
Pattern resolution = getResolutionPattern();
|
|
||||||
Pattern queryBlacklist = getBlacklistPattern();
|
|
||||||
|
|
||||||
Pattern[] stopwords = new Pattern[] { languageTag, videoSource, videoFormat, resolution, languageSuffix };
|
if (stopwords == null || blacklist == null) {
|
||||||
Pattern[] blacklist = new Pattern[] { clutterBracket, releaseGroup, languageTag, videoSource, videoFormat, resolution, languageSuffix, queryBlacklist };
|
Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet();
|
||||||
|
Pattern clutterBracket = getClutterBracketPattern(strict);
|
||||||
|
Pattern releaseGroup = getReleaseGroupPattern(strict);
|
||||||
|
Pattern languageSuffix = getLanguageSuffixPattern(languages);
|
||||||
|
Pattern languageTag = getLanguageTagPattern(languages);
|
||||||
|
Pattern videoSource = getVideoSourcePattern();
|
||||||
|
Pattern videoFormat = getVideoFormatPattern();
|
||||||
|
Pattern resolution = getResolutionPattern();
|
||||||
|
Pattern queryBlacklist = getBlacklistPattern();
|
||||||
|
|
||||||
|
stopwords = new Pattern[] { languageTag, videoSource, videoFormat, resolution, languageSuffix };
|
||||||
|
blacklist = new Pattern[] { clutterBracket, releaseGroup, languageTag, videoSource, videoFormat, resolution, languageSuffix, queryBlacklist };
|
||||||
|
|
||||||
|
// cache compiled patterns for common usage
|
||||||
|
if (strict) {
|
||||||
|
strict_stopwords = stopwords;
|
||||||
|
strict_blacklist = blacklist;
|
||||||
|
} else {
|
||||||
|
nonstrict_stopwords = stopwords;
|
||||||
|
nonstrict_blacklist = blacklist;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
List<String> output = new ArrayList<String>(items.size());
|
List<String> output = new ArrayList<String>(items.size());
|
||||||
for (String it : items) {
|
for (String it : items) {
|
||||||
|
@ -334,17 +358,8 @@ public class ReleaseInfo {
|
||||||
return patterns;
|
return patterns;
|
||||||
}
|
}
|
||||||
|
|
||||||
private final Map<Set<Locale>, Map<String, Locale>> languageMapCache = synchronizedMap(new WeakHashMap<Set<Locale>, Map<String, Locale>>(2));
|
|
||||||
|
|
||||||
|
|
||||||
private Map<String, Locale> getLanguageMap(Locale... supportedDisplayLocale) {
|
private Map<String, Locale> getLanguageMap(Locale... supportedDisplayLocale) {
|
||||||
// try cache
|
|
||||||
Set<Locale> displayLocales = new HashSet<Locale>(asList(supportedDisplayLocale));
|
|
||||||
Map<String, Locale> languageMap = languageMapCache.get(displayLocales);
|
|
||||||
if (languageMap != null) {
|
|
||||||
return languageMap;
|
|
||||||
}
|
|
||||||
|
|
||||||
// use maximum strength collator by default
|
// use maximum strength collator by default
|
||||||
Collator collator = Collator.getInstance(Locale.ROOT);
|
Collator collator = Collator.getInstance(Locale.ROOT);
|
||||||
collator.setDecomposition(Collator.FULL_DECOMPOSITION);
|
collator.setDecomposition(Collator.FULL_DECOMPOSITION);
|
||||||
|
@ -352,7 +367,7 @@ public class ReleaseInfo {
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
Comparator<String> order = (Comparator) collator;
|
Comparator<String> order = (Comparator) collator;
|
||||||
languageMap = new TreeMap<String, Locale>(order);
|
Map<String, Locale> languageMap = languageMap = new TreeMap<String, Locale>(order);
|
||||||
|
|
||||||
for (String code : Locale.getISOLanguages()) {
|
for (String code : Locale.getISOLanguages()) {
|
||||||
Locale locale = new Locale(code);
|
Locale locale = new Locale(code);
|
||||||
|
@ -360,7 +375,7 @@ public class ReleaseInfo {
|
||||||
languageMap.put(locale.getISO3Language(), locale);
|
languageMap.put(locale.getISO3Language(), locale);
|
||||||
|
|
||||||
// map display language names for given locales
|
// map display language names for given locales
|
||||||
for (Locale language : displayLocales) {
|
for (Locale language : new HashSet<Locale>(asList(supportedDisplayLocale))) {
|
||||||
// make sure language name is properly normalized so accents and whatever don't break the regex pattern syntax
|
// make sure language name is properly normalized so accents and whatever don't break the regex pattern syntax
|
||||||
String languageName = Normalizer.normalize(locale.getDisplayLanguage(language), Form.NFKD);
|
String languageName = Normalizer.normalize(locale.getDisplayLanguage(language), Form.NFKD);
|
||||||
languageMap.put(languageName, locale);
|
languageMap.put(languageName, locale);
|
||||||
|
@ -373,7 +388,6 @@ public class ReleaseInfo {
|
||||||
languageMap.remove("III");
|
languageMap.remove("III");
|
||||||
|
|
||||||
Map<String, Locale> result = unmodifiableMap(languageMap);
|
Map<String, Locale> result = unmodifiableMap(languageMap);
|
||||||
languageMapCache.put(displayLocales, result);
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,9 +7,9 @@ import static java.util.Collections.*;
|
||||||
|
|
||||||
import java.text.CollationKey;
|
import java.text.CollationKey;
|
||||||
import java.text.Collator;
|
import java.text.Collator;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.WeakHashMap;
|
|
||||||
|
|
||||||
|
|
||||||
public class CommonSequenceMatcher {
|
public class CommonSequenceMatcher {
|
||||||
|
@ -22,7 +22,6 @@ public class CommonSequenceMatcher {
|
||||||
return collator;
|
return collator;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected final Collator collator;
|
protected final Collator collator;
|
||||||
protected final int commonSequenceMaxStartIndex;
|
protected final int commonSequenceMaxStartIndex;
|
||||||
|
|
||||||
|
@ -81,8 +80,7 @@ public class CommonSequenceMatcher {
|
||||||
return getCollationKeys(sequence.split("\\s+"));
|
return getCollationKeys(sequence.split("\\s+"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private final Map<String, CollationKey> collationKeyDictionary = synchronizedMap(new HashMap<String, CollationKey>(256));
|
||||||
private final Map<String, CollationKey> collationKeyDictionary = synchronizedMap(new WeakHashMap<String, CollationKey>(256));
|
|
||||||
|
|
||||||
|
|
||||||
protected CollationKey[] getCollationKeys(String[] words) {
|
protected CollationKey[] getCollationKeys(String[] words) {
|
||||||
|
|
|
@ -7,13 +7,13 @@ import static java.util.Collections.*;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.IdentityHashMap;
|
import java.util.IdentityHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.WeakHashMap;
|
|
||||||
|
|
||||||
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
||||||
import net.sourceforge.filebot.web.Episode;
|
import net.sourceforge.filebot.web.Episode;
|
||||||
|
@ -77,9 +77,8 @@ public class EpisodeMatcher extends Matcher<File, Object> {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, true);
|
private final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, true);
|
||||||
private final Map<File, Set<SxE>> transformCache = synchronizedMap(new WeakHashMap<File, Set<SxE>>(64, 4));
|
private final Map<File, Set<SxE>> transformCache = synchronizedMap(new HashMap<File, Set<SxE>>(64, 4));
|
||||||
|
|
||||||
|
|
||||||
private Set<SxE> parseEpisodeIdentifer(File file) {
|
private Set<SxE> parseEpisodeIdentifer(File file) {
|
||||||
|
|
|
@ -10,8 +10,8 @@ import static net.sourceforge.tuned.FileUtilities.*;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.WeakHashMap;
|
|
||||||
|
|
||||||
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
||||||
import net.sourceforge.filebot.vfs.FileInfo;
|
import net.sourceforge.filebot.vfs.FileInfo;
|
||||||
|
@ -26,7 +26,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||||
// Match by season / episode numbers
|
// Match by season / episode numbers
|
||||||
SeasonEpisode(new SeasonEpisodeMetric() {
|
SeasonEpisode(new SeasonEpisodeMetric() {
|
||||||
|
|
||||||
private final Map<Object, Collection<SxE>> transformCache = synchronizedMap(new WeakHashMap<Object, Collection<SxE>>(64, 4));
|
private final Map<Object, Collection<SxE>> transformCache = synchronizedMap(new HashMap<Object, Collection<SxE>>(64, 4));
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -64,7 +64,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||||
// Match episode airdate
|
// Match episode airdate
|
||||||
AirDate(new DateMetric() {
|
AirDate(new DateMetric() {
|
||||||
|
|
||||||
private final Map<Object, Date> transformCache = synchronizedMap(new WeakHashMap<Object, Date>(64, 4));
|
private final Map<Object, Date> transformCache = synchronizedMap(new HashMap<Object, Date>(64, 4));
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -118,8 +118,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||||
EpisodeIdentifier(new MetricCascade(SeasonEpisode, AirDate)),
|
EpisodeIdentifier(new MetricCascade(SeasonEpisode, AirDate)),
|
||||||
|
|
||||||
// Advanced episode <-> file matching
|
// Advanced episode <-> file matching
|
||||||
EpisodeFunnel(new MetricCascade(SeasonEpisode, AirDate, Title)),
|
EpisodeFunnel(new MetricCascade(SeasonEpisode, AirDate, Title)), EpisodeBalancer(new SimilarityMetric() {
|
||||||
EpisodeBalancer(new SimilarityMetric() {
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getSimilarity(Object o1, Object o2) {
|
public float getSimilarity(Object o1, Object o2) {
|
||||||
|
@ -301,7 +300,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||||
return metric.getSimilarity(o1, o2);
|
return metric.getSimilarity(o1, o2);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Map<Object, String> transformCache = synchronizedMap(new WeakHashMap<Object, String>(64, 4));
|
private static final Map<Object, String> transformCache = synchronizedMap(new HashMap<Object, String>(64, 4));
|
||||||
|
|
||||||
|
|
||||||
protected static String normalizeObject(Object object) {
|
protected static String normalizeObject(Object object) {
|
||||||
|
|
|
@ -15,11 +15,11 @@ import static net.sourceforge.tuned.ui.TunedUtilities.*;
|
||||||
import java.awt.Component;
|
import java.awt.Component;
|
||||||
import java.awt.Dimension;
|
import java.awt.Dimension;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.AbstractMap.SimpleEntry;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
@ -144,21 +144,30 @@ class MovieHashMatcher implements AutoCompleteMatcher {
|
||||||
movieMatchFiles.addAll(filter(orphanedFiles, SUBTITLE_FILES)); // run movie detection only on orphaned subtitle files
|
movieMatchFiles.addAll(filter(orphanedFiles, SUBTITLE_FILES)); // run movie detection only on orphaned subtitle files
|
||||||
|
|
||||||
// match remaining movies file by file in parallel
|
// match remaining movies file by file in parallel
|
||||||
List<Future<Entry<File, Collection<Movie>>>> grabMovieJobs = new ArrayList<Future<Entry<File, Collection<Movie>>>>();
|
List<Future<Map<File, Collection<Movie>>>> grabMovieJobs = new ArrayList<Future<Map<File, Collection<Movie>>>>();
|
||||||
|
|
||||||
// process in parallel
|
// process in parallel
|
||||||
ExecutorService executor = Executors.newFixedThreadPool(getPreferredThreadPoolSize());
|
ExecutorService executor = Executors.newFixedThreadPool(getPreferredThreadPoolSize());
|
||||||
|
|
||||||
// map all files by movie
|
// map all files by movie
|
||||||
for (final File file : movieMatchFiles) {
|
List<File> remainingFiles = new ArrayList<File>();
|
||||||
if (movieByFile.containsKey(file))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
grabMovieJobs.add(executor.submit(new Callable<Entry<File, Collection<Movie>>>() {
|
for (File file : movieMatchFiles) {
|
||||||
|
if (!movieByFile.containsKey(file)) {
|
||||||
|
remainingFiles.add(file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (final Collection<File> folder : mapByFolder(remainingFiles).values()) {
|
||||||
|
grabMovieJobs.add(executor.submit(new Callable<Map<File, Collection<Movie>>>() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SimpleEntry<File, Collection<Movie>> call() throws Exception {
|
public Map<File, Collection<Movie>> call() throws Exception {
|
||||||
return new SimpleEntry<File, Collection<Movie>>(file, detectMovie(file, null, service, locale, false));
|
Map<File, Collection<Movie>> detection = new LinkedHashMap<File, Collection<Movie>>();
|
||||||
|
for (File f : folder) {
|
||||||
|
detection.put(f, detectMovie(f, null, service, locale, false));
|
||||||
|
}
|
||||||
|
return detection;
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
@ -169,12 +178,14 @@ class MovieHashMatcher implements AutoCompleteMatcher {
|
||||||
memory.put("selection", new TreeMap<String, String>(getLenientCollator(locale)));
|
memory.put("selection", new TreeMap<String, String>(getLenientCollator(locale)));
|
||||||
|
|
||||||
try {
|
try {
|
||||||
for (Future<Entry<File, Collection<Movie>>> it : grabMovieJobs) {
|
for (Future<Map<File, Collection<Movie>>> detection : grabMovieJobs) {
|
||||||
// auto-select movie or ask user
|
// auto-select movie or ask user
|
||||||
File movieFile = it.get().getKey();
|
for (Entry<File, Collection<Movie>> it : detection.get().entrySet()) {
|
||||||
Movie movie = grabMovieName(movieFile, it.get().getValue(), locale, autodetect, memory, parent);
|
File movieFile = it.getKey();
|
||||||
if (movie != null) {
|
Movie movie = grabMovieName(movieFile, it.getValue(), locale, autodetect, memory, parent);
|
||||||
movieByFile.put(movieFile, movie);
|
if (movie != null) {
|
||||||
|
movieByFile.put(movieFile, movie);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
|
|
Loading…
Reference in New Issue