From d73934f09b8fb27805a7a6cb53dd4b21ab8f449d Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Fri, 8 Apr 2016 22:59:27 +0000 Subject: [PATCH] Refactor LocalSearch and AutoDetection worker threads --- source/net/filebot/WebServices.java | 68 ++++--------------- source/net/filebot/media/AutoDetection.java | 7 +- .../filebot/ui/rename/AutoDetectMatcher.java | 25 +++---- source/net/filebot/web/AnidbClient.java | 32 ++++----- source/net/filebot/web/LocalSearch.java | 38 ++++------- test/net/filebot/web/AnidbClientTest.java | 4 +- 6 files changed, 56 insertions(+), 118 deletions(-) diff --git a/source/net/filebot/WebServices.java b/source/net/filebot/WebServices.java index d33d50fe..3ec678eb 100644 --- a/source/net/filebot/WebServices.java +++ b/source/net/filebot/WebServices.java @@ -11,7 +11,6 @@ import static net.filebot.util.FileUtilities.*; import java.util.Collection; import java.util.List; import java.util.Locale; -import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -69,7 +68,7 @@ public final class WebServices { public static final ID3Lookup MediaInfoID3 = new ID3Lookup(); public static EpisodeListProvider[] getEpisodeListProviders() { - return new EpisodeListProvider[] { TheTVDB, TheMovieDB_TV, AniDB, TVmaze }; + return new EpisodeListProvider[] { TheTVDB, AniDB, TheMovieDB_TV, TVmaze }; } public static MovieIdentificationService[] getMovieIdentificationServices() { @@ -108,6 +107,7 @@ public final class WebServices { } public static final ExecutorService requestThreadPool = Executors.newCachedThreadPool(); + public static final ExecutorService workerThreadPool = Executors.newWorkStealingPool(getPreferredThreadPoolSize()); public static class TheTVDBClientWithLocalSearch extends TheTVDBClient { @@ -115,29 +115,10 @@ public final class WebServices { super(apikey); } - // index of local thetvdb data dump - private static LocalSearch localIndex; - - public synchronized LocalSearch getLocalIndex() throws Exception { - if (localIndex == null) { - // fetch data dump - SearchResult[] data = releaseInfo.getTheTVDBIndex(); - - // index data dump - localIndex = new LocalSearch(asList(data)) { - - @Override - protected Set getFields(SearchResult object) { - return set(object.getEffectiveNames()); - } - }; - - // make local search more restrictive - localIndex.setResultMinimumSimilarity(0.7f); - } - - return localIndex; - } + // local TheTVDB search index + private final Resource> localIndex = Resource.lazy(() -> { + return new LocalSearch(releaseInfo.getTheTVDBIndex(), SearchResult::getEffectiveNames); + }).memoize(); private SearchResult merge(SearchResult prime, List group) { int id = prime.getId(); @@ -150,7 +131,7 @@ public final class WebServices { public List fetchSearchResult(final String query, final Locale locale) throws Exception { // run local search and API search in parallel Future> apiSearch = requestThreadPool.submit(() -> TheTVDBClientWithLocalSearch.super.fetchSearchResult(query, locale)); - Future> localSearch = requestThreadPool.submit(() -> getLocalIndex().search(query)); + Future> localSearch = requestThreadPool.submit(() -> localIndex.get().search(query)); // combine alias names into a single search results, and keep API search name as primary name Collection result = StreamEx.of(apiSearch.get()).append(localSearch.get()).groupingBy(SearchResult::getId, collectingAndThen(toList(), group -> merge(group.get(0), group))).values(); @@ -166,8 +147,8 @@ public final class WebServices { } @Override - public List getAnimeTitles() throws Exception { - return asList(releaseInfo.getAnidbIndex()); + public SearchResult[] getAnimeTitles() throws Exception { + return releaseInfo.getAnidbIndex(); } } @@ -177,34 +158,15 @@ public final class WebServices { super(name, version); } - // index of local OpenSubtitles data dump - private static LocalSearch localIndex; - - public synchronized LocalSearch getLocalIndex() throws Exception { - if (localIndex == null) { - // fetch data dump - SubtitleSearchResult[] data = releaseInfo.getOpenSubtitlesIndex(); - - // index data dump - localIndex = new LocalSearch(asList(data)) { - - @Override - protected Set getFields(SubtitleSearchResult object) { - return set(object.getEffectiveNames()); - } - }; - } - - return localIndex; - } + // local OpenSubtitles search index + private final Resource> localIndex = Resource.lazy(() -> { + return new LocalSearch(releaseInfo.getOpenSubtitlesIndex(), SearchResult::getEffectiveNames); + }).memoize(); @Override - public synchronized List search(final String query) throws Exception { - List results = getLocalIndex().search(query); - - return sortBySimilarity(results, singleton(query), new MetricAvg(getSeriesMatchMetric(), getMovieMatchMetric())); + public List search(final String query) throws Exception { + return sortBySimilarity(localIndex.get().search(query), singleton(query), new MetricAvg(getSeriesMatchMetric(), getMovieMatchMetric())); } - } /** diff --git a/source/net/filebot/media/AutoDetection.java b/source/net/filebot/media/AutoDetection.java index 63448ab7..dec6dac5 100644 --- a/source/net/filebot/media/AutoDetection.java +++ b/source/net/filebot/media/AutoDetection.java @@ -27,8 +27,6 @@ import java.util.Objects; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; import java.util.logging.Level; import java.util.regex.Pattern; import java.util.stream.Stream; @@ -122,9 +120,7 @@ public class AutoDetection { Map> groups = new TreeMap>(); // can't use parallel stream because default fork/join pool doesn't play well with the security manager - ExecutorService executor = Executors.newWorkStealingPool(); - - stream(files).collect(toMap(f -> f, f -> executor.submit(() -> detectGroup(f)))).forEach((file, group) -> { + stream(files).collect(toMap(f -> f, f -> workerThreadPool.submit(() -> detectGroup(f)))).forEach((file, group) -> { try { groups.computeIfAbsent(group.get(), k -> new TreeSet()).add(file); } catch (Exception e) { @@ -132,7 +128,6 @@ public class AutoDetection { } }); - executor.shutdown(); return groups; } diff --git a/source/net/filebot/ui/rename/AutoDetectMatcher.java b/source/net/filebot/ui/rename/AutoDetectMatcher.java index 7bdcb352..42912830 100644 --- a/source/net/filebot/ui/rename/AutoDetectMatcher.java +++ b/source/net/filebot/ui/rename/AutoDetectMatcher.java @@ -8,7 +8,6 @@ import static net.filebot.WebServices.*; import java.awt.Component; import java.io.File; -import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Locale; @@ -16,9 +15,9 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Objects; import java.util.Set; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.logging.Level; +import java.util.stream.Stream; import net.filebot.media.AutoDetection; import net.filebot.media.AutoDetection.Group; @@ -38,21 +37,19 @@ class AutoDetectMatcher implements AutoCompleteMatcher { Map> groups = new AutoDetection(files, false, locale).group(); // can't use parallel stream because default fork/join pool doesn't play well with the security manager - ExecutorService executor = Executors.newWorkStealingPool(); - List> result = new ArrayList>(); + Map>>> matches = groups.entrySet().stream().collect(toMap(Entry::getKey, it -> { + return workerThreadPool.submit(() -> match(it.getKey(), it.getValue(), strict, order, locale, autodetection, parent)); + })); - groups.entrySet().stream().collect(toMap(Entry::getKey, it -> { - return executor.submit(() -> match(it.getKey(), it.getValue(), strict, order, locale, autodetection, parent)); - })).forEach((group, matches) -> { + // collect results + return matches.entrySet().stream().flatMap(it -> { try { - result.addAll(matches.get()); + return it.getValue().get().stream(); } catch (Exception e) { - log.log(Level.WARNING, "Failed to process group: " + group, e); + log.log(Level.WARNING, "Failed to process group: %s" + it.getKey(), e); } - }); - - executor.shutdown(); - return result; + return Stream.empty(); + }).collect(toList()); } private List> match(Group group, Collection files, boolean strict, SortOrder order, Locale locale, boolean autodetection, Component parent) throws Exception { diff --git a/source/net/filebot/web/AnidbClient.java b/source/net/filebot/web/AnidbClient.java index 0d0eb041..cab01d16 100644 --- a/source/net/filebot/web/AnidbClient.java +++ b/source/net/filebot/web/AnidbClient.java @@ -21,7 +21,6 @@ import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -35,6 +34,7 @@ import org.w3c.dom.Node; import net.filebot.Cache; import net.filebot.CacheType; +import net.filebot.Resource; import net.filebot.ResourceManager; public class AnidbClient extends AbstractEpisodeListProvider { @@ -80,16 +80,14 @@ public class AnidbClient extends AbstractEpisodeListProvider { return fetchSearchResult(query, locale); } + // local AniDB search index + private final Resource> localIndex = Resource.lazy(() -> { + return new LocalSearch(getAnimeTitles(), SearchResult::getEffectiveNames); + }).memoize(); + @Override public List fetchSearchResult(String query, Locale locale) throws Exception { - LocalSearch index = new LocalSearch(getAnimeTitles()) { - - @Override - protected Set getFields(SearchResult it) { - return set(it.getEffectiveNames()); - } - }; - return new ArrayList(index.search(query)); + return localIndex.get().search(query); } @Override @@ -183,7 +181,7 @@ public class AnidbClient extends AbstractEpisodeListProvider { /** * This method is overridden in {@link net.filebot.WebServices.AnidbClientWithLocalSearch} to fetch the Anime Index from our own host and not anidb.net */ - public synchronized List getAnimeTitles() throws Exception { + public synchronized SearchResult[] getAnimeTitles() throws Exception { // get data file (unzip and cache) byte[] bytes = getCache("root").bytes("anime-titles.dat.gz", n -> new URL("http://anidb.net/api/" + n)).get(); @@ -230,24 +228,20 @@ public class AnidbClient extends AbstractEpisodeListProvider { } // build up a list of all possible AniDB search results - List anime = new ArrayList(entriesByAnime.size()); - - entriesByAnime.forEach((aid, triples) -> { - List names = triples.stream().sorted((a, b) -> { + return entriesByAnime.entrySet().stream().map(it -> { + List names = it.getValue().stream().sorted((a, b) -> { for (int i = 0; i < a.length; i++) { if (!a[i].equals(b[i])) { return ((Comparable) a[i]).compareTo(b[i]); } } return 0; - }).map(it -> (String) it[2]).collect(toList()); + }).map(n -> n[2].toString()).collect(toList()); String primaryTitle = names.get(0); List aliasNames = names.subList(1, names.size()); - anime.add(new SearchResult(aid, primaryTitle, aliasNames)); - }); - - return anime; + return new SearchResult(it.getKey(), primaryTitle, aliasNames); + }).toArray(SearchResult[]::new); } } diff --git a/source/net/filebot/web/LocalSearch.java b/source/net/filebot/web/LocalSearch.java index 3327d4b3..f24b200b 100644 --- a/source/net/filebot/web/LocalSearch.java +++ b/source/net/filebot/web/LocalSearch.java @@ -1,19 +1,19 @@ package net.filebot.web; -import static java.util.Collections.*; +import static java.util.Arrays.*; +import static java.util.Collections.reverseOrder; import static java.util.Comparator.*; import static java.util.stream.Collectors.*; import static net.filebot.similarity.Normalization.*; import java.util.AbstractMap.SimpleImmutableEntry; -import java.util.ArrayList; import java.util.Collection; -import java.util.HashSet; import java.util.List; import java.util.Map.Entry; import java.util.Objects; import java.util.Set; import java.util.concurrent.ExecutionException; +import java.util.function.Function; import java.util.stream.IntStream; import com.ibm.icu.text.Transliterator; @@ -29,20 +29,20 @@ public class LocalSearch { private Transliterator transliterator = Transliterator.getInstance("Any-Latin;Latin-ASCII;[:Diacritic:]remove"); - private List objects; - private List> fields; + private T[] objects; + private Set[] fields; - public LocalSearch(Collection data) { - objects = new ArrayList(data); - fields = objects.stream().map(this::getFields).collect(toList()); + public LocalSearch(T[] data, Function> keywords) { + objects = data.clone(); + fields = stream(objects).map(keywords).map(this::normalize).toArray(Set[]::new); } public List search(String q) throws ExecutionException, InterruptedException { String query = normalize(q); - return IntStream.range(0, objects.size()).mapToObj(i -> { - T object = objects.get(i); - Set field = fields.get(i); + return IntStream.range(0, objects.length).mapToObj(i -> { + T object = objects[i]; + Set field = fields[i]; boolean match = field.stream().anyMatch(it -> it.contains(query)); double similarity = field.stream().mapToDouble(it -> metric.getSimilarity(query, it)).max().orElse(0); @@ -59,22 +59,12 @@ public class LocalSearch { this.resultSetSize = resultSetSize; } - protected Set getFields(T object) { - return set(singleton(object.toString())); - } - - protected Set set(Collection values) { - Set set = new HashSet(values.size()); - for (String value : values) { - if (value != null) { - set.add(normalize(value)); - } - } - return set; + protected Set normalize(Collection values) { + return values.stream().map(this::normalize).collect(toSet()); } protected String normalize(String value) { - // normalize separator, normalize case and trim + // normalize separator, trim and normalize case return normalizePunctuation(transliterator.transform(value)).toLowerCase(); } diff --git a/test/net/filebot/web/AnidbClientTest.java b/test/net/filebot/web/AnidbClientTest.java index c0b32cef..6c179ab8 100644 --- a/test/net/filebot/web/AnidbClientTest.java +++ b/test/net/filebot/web/AnidbClientTest.java @@ -28,8 +28,8 @@ public class AnidbClientTest { @Test public void getAnimeTitles() throws Exception { - List animeTitles = anidb.getAnimeTitles(); - assertTrue(animeTitles.size() > 8000); + SearchResult[] animeTitles = anidb.getAnimeTitles(); + assertTrue(animeTitles.length > 8000); } @Test