* major performance improvements for mass-movie matching by optimizing data caching

* caching of requests didn't work in parallel searches because threads would always hit similar files (in order) at the same time and can't benefit from caching. new logic is one thread per folder as all files resulting in the same query are usually in the same folder
This commit is contained in:
Reinhard Pointner 2012-07-24 20:01:48 +00:00
parent d33a907f2f
commit d143e3feb5
6 changed files with 75 additions and 56 deletions

View File

@ -31,7 +31,6 @@ import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.TreeSet;
import java.util.WeakHashMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
@ -427,7 +426,6 @@ public class MediaDetection {
return null;
}
private static List<Entry<String, Movie>> movieIndex;
@ -531,7 +529,7 @@ public class MediaDetection {
final SimilarityMetric metric = new NameSimilarityMetric();
final Map<Movie, Float> probabilityMap = new LinkedHashMap<Movie, Float>();
for (String query : querySet) {
for (Movie movie : queryLookupService.searchMovie(query, locale)) {
for (Movie movie : queryLookupService.searchMovie(query.toLowerCase(), locale)) {
probabilityMap.put(movie, metric.getSimilarity(query, movie));
}
}
@ -660,7 +658,7 @@ public class MediaDetection {
private static final Collator collator = getLenientCollator(Locale.ENGLISH);
private static final Map<String, CollationKey[]> transformCache = synchronizedMap(new WeakHashMap<String, CollationKey[]>(65536));
private static final Map<String, CollationKey[]> transformCache = synchronizedMap(new HashMap<String, CollationKey[]>(65536));
public HighPerformanceMatcher(int maxStartIndex) {

View File

@ -29,7 +29,6 @@ import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import java.util.TreeMap;
import java.util.WeakHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
@ -102,10 +101,24 @@ public class ReleaseInfo {
return lastMatch;
}
// cached patterns
private Pattern[] strict_stopwords;
private Pattern[] strict_blacklist;
private Pattern[] nonstrict_stopwords;
private Pattern[] nonstrict_blacklist;
public List<String> cleanRelease(Collection<String> items, boolean strict) throws IOException {
Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet();
Pattern[] stopwords;
Pattern[] blacklist;
// initialize cached patterns
synchronized (this) {
stopwords = strict ? strict_stopwords : nonstrict_stopwords;
blacklist = strict ? strict_blacklist : nonstrict_blacklist;
if (stopwords == null || blacklist == null) {
Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet();
Pattern clutterBracket = getClutterBracketPattern(strict);
Pattern releaseGroup = getReleaseGroupPattern(strict);
Pattern languageSuffix = getLanguageSuffixPattern(languages);
@ -115,8 +128,19 @@ public class ReleaseInfo {
Pattern resolution = getResolutionPattern();
Pattern queryBlacklist = getBlacklistPattern();
Pattern[] stopwords = new Pattern[] { languageTag, videoSource, videoFormat, resolution, languageSuffix };
Pattern[] blacklist = new Pattern[] { clutterBracket, releaseGroup, languageTag, videoSource, videoFormat, resolution, languageSuffix, queryBlacklist };
stopwords = new Pattern[] { languageTag, videoSource, videoFormat, resolution, languageSuffix };
blacklist = new Pattern[] { clutterBracket, releaseGroup, languageTag, videoSource, videoFormat, resolution, languageSuffix, queryBlacklist };
// cache compiled patterns for common usage
if (strict) {
strict_stopwords = stopwords;
strict_blacklist = blacklist;
} else {
nonstrict_stopwords = stopwords;
nonstrict_blacklist = blacklist;
}
}
}
List<String> output = new ArrayList<String>(items.size());
for (String it : items) {
@ -334,17 +358,8 @@ public class ReleaseInfo {
return patterns;
}
private final Map<Set<Locale>, Map<String, Locale>> languageMapCache = synchronizedMap(new WeakHashMap<Set<Locale>, Map<String, Locale>>(2));
private Map<String, Locale> getLanguageMap(Locale... supportedDisplayLocale) {
// try cache
Set<Locale> displayLocales = new HashSet<Locale>(asList(supportedDisplayLocale));
Map<String, Locale> languageMap = languageMapCache.get(displayLocales);
if (languageMap != null) {
return languageMap;
}
// use maximum strength collator by default
Collator collator = Collator.getInstance(Locale.ROOT);
collator.setDecomposition(Collator.FULL_DECOMPOSITION);
@ -352,7 +367,7 @@ public class ReleaseInfo {
@SuppressWarnings("unchecked")
Comparator<String> order = (Comparator) collator;
languageMap = new TreeMap<String, Locale>(order);
Map<String, Locale> languageMap = languageMap = new TreeMap<String, Locale>(order);
for (String code : Locale.getISOLanguages()) {
Locale locale = new Locale(code);
@ -360,7 +375,7 @@ public class ReleaseInfo {
languageMap.put(locale.getISO3Language(), locale);
// map display language names for given locales
for (Locale language : displayLocales) {
for (Locale language : new HashSet<Locale>(asList(supportedDisplayLocale))) {
// make sure language name is properly normalized so accents and whatever don't break the regex pattern syntax
String languageName = Normalizer.normalize(locale.getDisplayLanguage(language), Form.NFKD);
languageMap.put(languageName, locale);
@ -373,7 +388,6 @@ public class ReleaseInfo {
languageMap.remove("III");
Map<String, Locale> result = unmodifiableMap(languageMap);
languageMapCache.put(displayLocales, result);
return result;
}
}

View File

@ -7,9 +7,9 @@ import static java.util.Collections.*;
import java.text.CollationKey;
import java.text.Collator;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.WeakHashMap;
public class CommonSequenceMatcher {
@ -22,7 +22,6 @@ public class CommonSequenceMatcher {
return collator;
}
protected final Collator collator;
protected final int commonSequenceMaxStartIndex;
@ -81,8 +80,7 @@ public class CommonSequenceMatcher {
return getCollationKeys(sequence.split("\\s+"));
}
private final Map<String, CollationKey> collationKeyDictionary = synchronizedMap(new WeakHashMap<String, CollationKey>(256));
private final Map<String, CollationKey> collationKeyDictionary = synchronizedMap(new HashMap<String, CollationKey>(256));
protected CollationKey[] getCollationKeys(String[] words) {

View File

@ -7,13 +7,13 @@ import static java.util.Collections.*;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.WeakHashMap;
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
import net.sourceforge.filebot.web.Episode;
@ -77,9 +77,8 @@ public class EpisodeMatcher extends Matcher<File, Object> {
}
private final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, true);
private final Map<File, Set<SxE>> transformCache = synchronizedMap(new WeakHashMap<File, Set<SxE>>(64, 4));
private final Map<File, Set<SxE>> transformCache = synchronizedMap(new HashMap<File, Set<SxE>>(64, 4));
private Set<SxE> parseEpisodeIdentifer(File file) {

View File

@ -10,8 +10,8 @@ import static net.sourceforge.tuned.FileUtilities.*;
import java.io.File;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.WeakHashMap;
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
import net.sourceforge.filebot.vfs.FileInfo;
@ -26,7 +26,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
// Match by season / episode numbers
SeasonEpisode(new SeasonEpisodeMetric() {
private final Map<Object, Collection<SxE>> transformCache = synchronizedMap(new WeakHashMap<Object, Collection<SxE>>(64, 4));
private final Map<Object, Collection<SxE>> transformCache = synchronizedMap(new HashMap<Object, Collection<SxE>>(64, 4));
@Override
@ -64,7 +64,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
// Match episode airdate
AirDate(new DateMetric() {
private final Map<Object, Date> transformCache = synchronizedMap(new WeakHashMap<Object, Date>(64, 4));
private final Map<Object, Date> transformCache = synchronizedMap(new HashMap<Object, Date>(64, 4));
@Override
@ -118,8 +118,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
EpisodeIdentifier(new MetricCascade(SeasonEpisode, AirDate)),
// Advanced episode <-> file matching
EpisodeFunnel(new MetricCascade(SeasonEpisode, AirDate, Title)),
EpisodeBalancer(new SimilarityMetric() {
EpisodeFunnel(new MetricCascade(SeasonEpisode, AirDate, Title)), EpisodeBalancer(new SimilarityMetric() {
@Override
public float getSimilarity(Object o1, Object o2) {
@ -301,7 +300,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
return metric.getSimilarity(o1, o2);
}
private static final Map<Object, String> transformCache = synchronizedMap(new WeakHashMap<Object, String>(64, 4));
private static final Map<Object, String> transformCache = synchronizedMap(new HashMap<Object, String>(64, 4));
protected static String normalizeObject(Object object) {

View File

@ -15,11 +15,11 @@ import static net.sourceforge.tuned.ui.TunedUtilities.*;
import java.awt.Component;
import java.awt.Dimension;
import java.io.File;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
@ -144,21 +144,30 @@ class MovieHashMatcher implements AutoCompleteMatcher {
movieMatchFiles.addAll(filter(orphanedFiles, SUBTITLE_FILES)); // run movie detection only on orphaned subtitle files
// match remaining movies file by file in parallel
List<Future<Entry<File, Collection<Movie>>>> grabMovieJobs = new ArrayList<Future<Entry<File, Collection<Movie>>>>();
List<Future<Map<File, Collection<Movie>>>> grabMovieJobs = new ArrayList<Future<Map<File, Collection<Movie>>>>();
// process in parallel
ExecutorService executor = Executors.newFixedThreadPool(getPreferredThreadPoolSize());
// map all files by movie
for (final File file : movieMatchFiles) {
if (movieByFile.containsKey(file))
continue;
List<File> remainingFiles = new ArrayList<File>();
grabMovieJobs.add(executor.submit(new Callable<Entry<File, Collection<Movie>>>() {
for (File file : movieMatchFiles) {
if (!movieByFile.containsKey(file)) {
remainingFiles.add(file);
}
}
for (final Collection<File> folder : mapByFolder(remainingFiles).values()) {
grabMovieJobs.add(executor.submit(new Callable<Map<File, Collection<Movie>>>() {
@Override
public SimpleEntry<File, Collection<Movie>> call() throws Exception {
return new SimpleEntry<File, Collection<Movie>>(file, detectMovie(file, null, service, locale, false));
public Map<File, Collection<Movie>> call() throws Exception {
Map<File, Collection<Movie>> detection = new LinkedHashMap<File, Collection<Movie>>();
for (File f : folder) {
detection.put(f, detectMovie(f, null, service, locale, false));
}
return detection;
}
}));
}
@ -169,14 +178,16 @@ class MovieHashMatcher implements AutoCompleteMatcher {
memory.put("selection", new TreeMap<String, String>(getLenientCollator(locale)));
try {
for (Future<Entry<File, Collection<Movie>>> it : grabMovieJobs) {
for (Future<Map<File, Collection<Movie>>> detection : grabMovieJobs) {
// auto-select movie or ask user
File movieFile = it.get().getKey();
Movie movie = grabMovieName(movieFile, it.get().getValue(), locale, autodetect, memory, parent);
for (Entry<File, Collection<Movie>> it : detection.get().entrySet()) {
File movieFile = it.getKey();
Movie movie = grabMovieName(movieFile, it.getValue(), locale, autodetect, memory, parent);
if (movie != null) {
movieByFile.put(movieFile, movie);
}
}
}
} finally {
executor.shutdownNow();
}