* major performance improvements for mass-movie matching by optimizing data caching
* caching of requests didn't work in parallel searches because threads would always hit similar files (in order) at the same time and can't benefit from caching. new logic is one thread per folder as all files resulting in the same query are usually in the same folder
This commit is contained in:
parent
d33a907f2f
commit
d143e3feb5
|
@ -31,7 +31,6 @@ import java.util.Map.Entry;
|
|||
import java.util.NoSuchElementException;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.WeakHashMap;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
|
@ -427,7 +426,6 @@ public class MediaDetection {
|
|||
return null;
|
||||
}
|
||||
|
||||
|
||||
private static List<Entry<String, Movie>> movieIndex;
|
||||
|
||||
|
||||
|
@ -531,7 +529,7 @@ public class MediaDetection {
|
|||
final SimilarityMetric metric = new NameSimilarityMetric();
|
||||
final Map<Movie, Float> probabilityMap = new LinkedHashMap<Movie, Float>();
|
||||
for (String query : querySet) {
|
||||
for (Movie movie : queryLookupService.searchMovie(query, locale)) {
|
||||
for (Movie movie : queryLookupService.searchMovie(query.toLowerCase(), locale)) {
|
||||
probabilityMap.put(movie, metric.getSimilarity(query, movie));
|
||||
}
|
||||
}
|
||||
|
@ -660,7 +658,7 @@ public class MediaDetection {
|
|||
|
||||
private static final Collator collator = getLenientCollator(Locale.ENGLISH);
|
||||
|
||||
private static final Map<String, CollationKey[]> transformCache = synchronizedMap(new WeakHashMap<String, CollationKey[]>(65536));
|
||||
private static final Map<String, CollationKey[]> transformCache = synchronizedMap(new HashMap<String, CollationKey[]>(65536));
|
||||
|
||||
|
||||
public HighPerformanceMatcher(int maxStartIndex) {
|
||||
|
|
|
@ -29,7 +29,6 @@ import java.util.Map;
|
|||
import java.util.Scanner;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.WeakHashMap;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
@ -102,21 +101,46 @@ public class ReleaseInfo {
|
|||
return lastMatch;
|
||||
}
|
||||
|
||||
// cached patterns
|
||||
private Pattern[] strict_stopwords;
|
||||
private Pattern[] strict_blacklist;
|
||||
private Pattern[] nonstrict_stopwords;
|
||||
private Pattern[] nonstrict_blacklist;
|
||||
|
||||
|
||||
public List<String> cleanRelease(Collection<String> items, boolean strict) throws IOException {
|
||||
Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet();
|
||||
Pattern[] stopwords;
|
||||
Pattern[] blacklist;
|
||||
|
||||
Pattern clutterBracket = getClutterBracketPattern(strict);
|
||||
Pattern releaseGroup = getReleaseGroupPattern(strict);
|
||||
Pattern languageSuffix = getLanguageSuffixPattern(languages);
|
||||
Pattern languageTag = getLanguageTagPattern(languages);
|
||||
Pattern videoSource = getVideoSourcePattern();
|
||||
Pattern videoFormat = getVideoFormatPattern();
|
||||
Pattern resolution = getResolutionPattern();
|
||||
Pattern queryBlacklist = getBlacklistPattern();
|
||||
// initialize cached patterns
|
||||
synchronized (this) {
|
||||
stopwords = strict ? strict_stopwords : nonstrict_stopwords;
|
||||
blacklist = strict ? strict_blacklist : nonstrict_blacklist;
|
||||
|
||||
Pattern[] stopwords = new Pattern[] { languageTag, videoSource, videoFormat, resolution, languageSuffix };
|
||||
Pattern[] blacklist = new Pattern[] { clutterBracket, releaseGroup, languageTag, videoSource, videoFormat, resolution, languageSuffix, queryBlacklist };
|
||||
if (stopwords == null || blacklist == null) {
|
||||
Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet();
|
||||
Pattern clutterBracket = getClutterBracketPattern(strict);
|
||||
Pattern releaseGroup = getReleaseGroupPattern(strict);
|
||||
Pattern languageSuffix = getLanguageSuffixPattern(languages);
|
||||
Pattern languageTag = getLanguageTagPattern(languages);
|
||||
Pattern videoSource = getVideoSourcePattern();
|
||||
Pattern videoFormat = getVideoFormatPattern();
|
||||
Pattern resolution = getResolutionPattern();
|
||||
Pattern queryBlacklist = getBlacklistPattern();
|
||||
|
||||
stopwords = new Pattern[] { languageTag, videoSource, videoFormat, resolution, languageSuffix };
|
||||
blacklist = new Pattern[] { clutterBracket, releaseGroup, languageTag, videoSource, videoFormat, resolution, languageSuffix, queryBlacklist };
|
||||
|
||||
// cache compiled patterns for common usage
|
||||
if (strict) {
|
||||
strict_stopwords = stopwords;
|
||||
strict_blacklist = blacklist;
|
||||
} else {
|
||||
nonstrict_stopwords = stopwords;
|
||||
nonstrict_blacklist = blacklist;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
List<String> output = new ArrayList<String>(items.size());
|
||||
for (String it : items) {
|
||||
|
@ -334,17 +358,8 @@ public class ReleaseInfo {
|
|||
return patterns;
|
||||
}
|
||||
|
||||
private final Map<Set<Locale>, Map<String, Locale>> languageMapCache = synchronizedMap(new WeakHashMap<Set<Locale>, Map<String, Locale>>(2));
|
||||
|
||||
|
||||
private Map<String, Locale> getLanguageMap(Locale... supportedDisplayLocale) {
|
||||
// try cache
|
||||
Set<Locale> displayLocales = new HashSet<Locale>(asList(supportedDisplayLocale));
|
||||
Map<String, Locale> languageMap = languageMapCache.get(displayLocales);
|
||||
if (languageMap != null) {
|
||||
return languageMap;
|
||||
}
|
||||
|
||||
// use maximum strength collator by default
|
||||
Collator collator = Collator.getInstance(Locale.ROOT);
|
||||
collator.setDecomposition(Collator.FULL_DECOMPOSITION);
|
||||
|
@ -352,7 +367,7 @@ public class ReleaseInfo {
|
|||
|
||||
@SuppressWarnings("unchecked")
|
||||
Comparator<String> order = (Comparator) collator;
|
||||
languageMap = new TreeMap<String, Locale>(order);
|
||||
Map<String, Locale> languageMap = languageMap = new TreeMap<String, Locale>(order);
|
||||
|
||||
for (String code : Locale.getISOLanguages()) {
|
||||
Locale locale = new Locale(code);
|
||||
|
@ -360,7 +375,7 @@ public class ReleaseInfo {
|
|||
languageMap.put(locale.getISO3Language(), locale);
|
||||
|
||||
// map display language names for given locales
|
||||
for (Locale language : displayLocales) {
|
||||
for (Locale language : new HashSet<Locale>(asList(supportedDisplayLocale))) {
|
||||
// make sure language name is properly normalized so accents and whatever don't break the regex pattern syntax
|
||||
String languageName = Normalizer.normalize(locale.getDisplayLanguage(language), Form.NFKD);
|
||||
languageMap.put(languageName, locale);
|
||||
|
@ -373,7 +388,6 @@ public class ReleaseInfo {
|
|||
languageMap.remove("III");
|
||||
|
||||
Map<String, Locale> result = unmodifiableMap(languageMap);
|
||||
languageMapCache.put(displayLocales, result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,9 +7,9 @@ import static java.util.Collections.*;
|
|||
|
||||
import java.text.CollationKey;
|
||||
import java.text.Collator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.WeakHashMap;
|
||||
|
||||
|
||||
public class CommonSequenceMatcher {
|
||||
|
@ -22,7 +22,6 @@ public class CommonSequenceMatcher {
|
|||
return collator;
|
||||
}
|
||||
|
||||
|
||||
protected final Collator collator;
|
||||
protected final int commonSequenceMaxStartIndex;
|
||||
|
||||
|
@ -81,8 +80,7 @@ public class CommonSequenceMatcher {
|
|||
return getCollationKeys(sequence.split("\\s+"));
|
||||
}
|
||||
|
||||
|
||||
private final Map<String, CollationKey> collationKeyDictionary = synchronizedMap(new WeakHashMap<String, CollationKey>(256));
|
||||
private final Map<String, CollationKey> collationKeyDictionary = synchronizedMap(new HashMap<String, CollationKey>(256));
|
||||
|
||||
|
||||
protected CollationKey[] getCollationKeys(String[] words) {
|
||||
|
|
|
@ -7,13 +7,13 @@ import static java.util.Collections.*;
|
|||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.WeakHashMap;
|
||||
|
||||
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
||||
import net.sourceforge.filebot.web.Episode;
|
||||
|
@ -77,9 +77,8 @@ public class EpisodeMatcher extends Matcher<File, Object> {
|
|||
|
||||
}
|
||||
|
||||
|
||||
private final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, true);
|
||||
private final Map<File, Set<SxE>> transformCache = synchronizedMap(new WeakHashMap<File, Set<SxE>>(64, 4));
|
||||
private final Map<File, Set<SxE>> transformCache = synchronizedMap(new HashMap<File, Set<SxE>>(64, 4));
|
||||
|
||||
|
||||
private Set<SxE> parseEpisodeIdentifer(File file) {
|
||||
|
|
|
@ -10,8 +10,8 @@ import static net.sourceforge.tuned.FileUtilities.*;
|
|||
|
||||
import java.io.File;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.WeakHashMap;
|
||||
|
||||
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
||||
import net.sourceforge.filebot.vfs.FileInfo;
|
||||
|
@ -26,7 +26,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
|||
// Match by season / episode numbers
|
||||
SeasonEpisode(new SeasonEpisodeMetric() {
|
||||
|
||||
private final Map<Object, Collection<SxE>> transformCache = synchronizedMap(new WeakHashMap<Object, Collection<SxE>>(64, 4));
|
||||
private final Map<Object, Collection<SxE>> transformCache = synchronizedMap(new HashMap<Object, Collection<SxE>>(64, 4));
|
||||
|
||||
|
||||
@Override
|
||||
|
@ -64,7 +64,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
|||
// Match episode airdate
|
||||
AirDate(new DateMetric() {
|
||||
|
||||
private final Map<Object, Date> transformCache = synchronizedMap(new WeakHashMap<Object, Date>(64, 4));
|
||||
private final Map<Object, Date> transformCache = synchronizedMap(new HashMap<Object, Date>(64, 4));
|
||||
|
||||
|
||||
@Override
|
||||
|
@ -118,8 +118,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
|||
EpisodeIdentifier(new MetricCascade(SeasonEpisode, AirDate)),
|
||||
|
||||
// Advanced episode <-> file matching
|
||||
EpisodeFunnel(new MetricCascade(SeasonEpisode, AirDate, Title)),
|
||||
EpisodeBalancer(new SimilarityMetric() {
|
||||
EpisodeFunnel(new MetricCascade(SeasonEpisode, AirDate, Title)), EpisodeBalancer(new SimilarityMetric() {
|
||||
|
||||
@Override
|
||||
public float getSimilarity(Object o1, Object o2) {
|
||||
|
@ -301,7 +300,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
|||
return metric.getSimilarity(o1, o2);
|
||||
}
|
||||
|
||||
private static final Map<Object, String> transformCache = synchronizedMap(new WeakHashMap<Object, String>(64, 4));
|
||||
private static final Map<Object, String> transformCache = synchronizedMap(new HashMap<Object, String>(64, 4));
|
||||
|
||||
|
||||
protected static String normalizeObject(Object object) {
|
||||
|
|
|
@ -15,11 +15,11 @@ import static net.sourceforge.tuned.ui.TunedUtilities.*;
|
|||
import java.awt.Component;
|
||||
import java.awt.Dimension;
|
||||
import java.io.File;
|
||||
import java.util.AbstractMap.SimpleEntry;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
@ -144,21 +144,30 @@ class MovieHashMatcher implements AutoCompleteMatcher {
|
|||
movieMatchFiles.addAll(filter(orphanedFiles, SUBTITLE_FILES)); // run movie detection only on orphaned subtitle files
|
||||
|
||||
// match remaining movies file by file in parallel
|
||||
List<Future<Entry<File, Collection<Movie>>>> grabMovieJobs = new ArrayList<Future<Entry<File, Collection<Movie>>>>();
|
||||
List<Future<Map<File, Collection<Movie>>>> grabMovieJobs = new ArrayList<Future<Map<File, Collection<Movie>>>>();
|
||||
|
||||
// process in parallel
|
||||
ExecutorService executor = Executors.newFixedThreadPool(getPreferredThreadPoolSize());
|
||||
|
||||
// map all files by movie
|
||||
for (final File file : movieMatchFiles) {
|
||||
if (movieByFile.containsKey(file))
|
||||
continue;
|
||||
List<File> remainingFiles = new ArrayList<File>();
|
||||
|
||||
grabMovieJobs.add(executor.submit(new Callable<Entry<File, Collection<Movie>>>() {
|
||||
for (File file : movieMatchFiles) {
|
||||
if (!movieByFile.containsKey(file)) {
|
||||
remainingFiles.add(file);
|
||||
}
|
||||
}
|
||||
|
||||
for (final Collection<File> folder : mapByFolder(remainingFiles).values()) {
|
||||
grabMovieJobs.add(executor.submit(new Callable<Map<File, Collection<Movie>>>() {
|
||||
|
||||
@Override
|
||||
public SimpleEntry<File, Collection<Movie>> call() throws Exception {
|
||||
return new SimpleEntry<File, Collection<Movie>>(file, detectMovie(file, null, service, locale, false));
|
||||
public Map<File, Collection<Movie>> call() throws Exception {
|
||||
Map<File, Collection<Movie>> detection = new LinkedHashMap<File, Collection<Movie>>();
|
||||
for (File f : folder) {
|
||||
detection.put(f, detectMovie(f, null, service, locale, false));
|
||||
}
|
||||
return detection;
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
@ -169,12 +178,14 @@ class MovieHashMatcher implements AutoCompleteMatcher {
|
|||
memory.put("selection", new TreeMap<String, String>(getLenientCollator(locale)));
|
||||
|
||||
try {
|
||||
for (Future<Entry<File, Collection<Movie>>> it : grabMovieJobs) {
|
||||
for (Future<Map<File, Collection<Movie>>> detection : grabMovieJobs) {
|
||||
// auto-select movie or ask user
|
||||
File movieFile = it.get().getKey();
|
||||
Movie movie = grabMovieName(movieFile, it.get().getValue(), locale, autodetect, memory, parent);
|
||||
if (movie != null) {
|
||||
movieByFile.put(movieFile, movie);
|
||||
for (Entry<File, Collection<Movie>> it : detection.get().entrySet()) {
|
||||
File movieFile = it.getKey();
|
||||
Movie movie = grabMovieName(movieFile, it.getValue(), locale, autodetect, memory, parent);
|
||||
if (movie != null) {
|
||||
movieByFile.put(movieFile, movie);
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
|
|
Loading…
Reference in New Issue