* replace TheTVDB online search with local search as best as possible to improve search result ranking

This commit is contained in:
Reinhard Pointner 2013-10-13 14:50:45 +00:00
parent 071ee0f1b0
commit 2c91a3be2e
9 changed files with 104 additions and 85 deletions

View File

@ -1,13 +1,11 @@
package net.sourceforge.filebot;
import static java.util.Arrays.*;
import static java.util.Collections.*;
import static net.sourceforge.filebot.Settings.*;
import static net.sourceforge.filebot.media.MediaDetection.*;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
@ -20,7 +18,6 @@ import java.util.concurrent.Future;
import java.util.logging.Level;
import java.util.logging.Logger;
import net.sourceforge.filebot.media.MediaDetection;
import net.sourceforge.filebot.web.AcoustID;
import net.sourceforge.filebot.web.AnidbClient;
import net.sourceforge.filebot.web.AnidbSearchResult;
@ -41,119 +38,107 @@ import net.sourceforge.filebot.web.TheTVDBClient;
import net.sourceforge.filebot.web.TheTVDBSearchResult;
import net.sourceforge.filebot.web.VideoHashSubtitleService;
/**
* Reuse the same web service client so login, cache, etc. can be shared.
*/
public final class WebServices {
// episode dbs
public static final TVRageClient TVRage = new TVRageClient();
public static final AnidbClient AniDB = new AnidbClientWithLocalSearch(getApplicationName().toLowerCase(), 4);
public static final SerienjunkiesClient Serienjunkies = new SerienjunkiesClient(getApplicationProperty("serienjunkies.apikey"));
// extended TheTVDB module with local search
public static final TheTVDBClientWithLocalSearch TheTVDB = new TheTVDBClientWithLocalSearch(getApplicationProperty("thetvdb.apikey"));
// movie dbs
public static final IMDbClient IMDb = new IMDbClient();
public static final TMDbClient TMDb = new TMDbClient(getApplicationProperty("themoviedb.apikey"));
// subtitle dbs
public static final OpenSubtitlesClient OpenSubtitles = new OpenSubtitlesClient(String.format("%s %s", getApplicationName(), getApplicationVersion()));
// misc
public static final FanartTV FanartTV = new FanartTV(Settings.getApplicationProperty("fanart.tv.apikey"));
public static final AcoustID AcoustID = new AcoustID(Settings.getApplicationProperty("acoustid.apikey"));
public static EpisodeListProvider[] getEpisodeListProviders() {
return new EpisodeListProvider[] { TheTVDB, AniDB, TVRage, Serienjunkies };
}
public static MovieIdentificationService[] getMovieIdentificationServices() {
return new MovieIdentificationService[] { TMDb, IMDb, OpenSubtitles };
}
public static SubtitleProvider[] getSubtitleProviders() {
return new SubtitleProvider[] { OpenSubtitles };
}
public static VideoHashSubtitleService[] getVideoHashSubtitleServices() {
return new VideoHashSubtitleService[] { OpenSubtitles };
}
public static MusicIdentificationService[] getMusicIdentificationServices() {
return new MusicIdentificationService[] { AcoustID, new ID3Lookup() };
}
public static EpisodeListProvider getEpisodeListProvider(String name) {
for (EpisodeListProvider it : WebServices.getEpisodeListProviders()) {
if (it.getName().equalsIgnoreCase(name))
return it;
}
return null; // default
}
public static MovieIdentificationService getMovieIdentificationService(String name) {
for (MovieIdentificationService it : getMovieIdentificationServices()) {
if (it.getName().equalsIgnoreCase(name))
return it;
}
return null; // default
}
public static MusicIdentificationService getMusicIdentificationService(String name) {
for (MusicIdentificationService it : getMusicIdentificationServices()) {
if (it.getName().equalsIgnoreCase(name))
return it;
}
return null; // default
}
public static class TheTVDBClientWithLocalSearch extends TheTVDBClient {
public TheTVDBClientWithLocalSearch(String apikey) {
super(apikey);
}
// index of local thetvdb data dump
private static LocalSearch<SearchResult> localIndex;
public synchronized LocalSearch<SearchResult> getLocalIndex() throws IOException {
if (localIndex == null) {
// fetch data dump
TheTVDBSearchResult[] data = MediaDetection.releaseInfo.getTheTVDBIndex();
TheTVDBSearchResult[] data = releaseInfo.getTheTVDBIndex();
// index data dump
localIndex = new LocalSearch<SearchResult>(asList(data)) {
@Override
protected Set<String> getFields(SearchResult object) {
return set(object.getNames());
return set(object.getEffectiveNames());
}
};
// make local search more restrictive
localIndex.setResultMinimumSimilarity(0.7f);
}
return localIndex;
}
public SeriesInfo getSeriesInfoByLocalIndex(String name, Locale locale) throws Exception {
List<SearchResult> results = getLocalIndex().search(name);
if (results.size() > 0) {
@ -161,20 +146,19 @@ public final class WebServices {
}
return null;
}
@SuppressWarnings("unchecked")
@Override
public List<SearchResult> fetchSearchResult(final String query, final Locale locale) throws Exception {
Callable<List<SearchResult>> apiSearch = new Callable<List<SearchResult>>() {
@Override
public List<SearchResult> call() throws Exception {
return TheTVDBClientWithLocalSearch.super.fetchSearchResult(query, locale);
}
};
Callable<List<SearchResult>> localSearch = new Callable<List<SearchResult>>() {
@Override
public List<SearchResult> call() throws Exception {
try {
@ -182,17 +166,16 @@ public final class WebServices {
} catch (Exception e) {
Logger.getLogger(TheTVDBClientWithLocalSearch.class.getName()).log(Level.SEVERE, e.getMessage(), e);
}
// let local search fail gracefully without affecting API search
return emptyList();
}
};
ExecutorService executor = Executors.newFixedThreadPool(2);
try {
Set<SearchResult> results = new LinkedHashSet<SearchResult>();
for (Future<List<SearchResult>> resultSet : executor.invokeAll(asList(apiSearch, localSearch))) {
for (Future<List<SearchResult>> resultSet : executor.invokeAll(asList(localSearch, apiSearch))) {
try {
results.addAll(resultSet.get());
} catch (ExecutionException e) {
@ -201,35 +184,32 @@ public final class WebServices {
}
}
}
return new ArrayList<SearchResult>(results);
return sortBySimilarity(results, singleton(query), getSeriesMatchMetric(), false);
} finally {
executor.shutdownNow();
}
};
}
public static class AnidbClientWithLocalSearch extends AnidbClient {
public AnidbClientWithLocalSearch(String client, int clientver) {
super(client, clientver);
}
@Override
public List<AnidbSearchResult> getAnimeTitles() throws Exception {
return asList(MediaDetection.releaseInfo.getAnidbIndex());
return asList(releaseInfo.getAnidbIndex());
}
}
/**
* Dummy constructor to prevent instantiation.
*/
private WebServices() {
throw new UnsupportedOperationException();
}
/**
* Initialize client settings from system properties
*/
@ -237,13 +217,11 @@ public final class WebServices {
String[] osdbLogin = getLogin("osdb.user");
OpenSubtitles.setUser(osdbLogin[0], osdbLogin[1]);
}
public static String[] getLogin(String key) {
return Settings.forPackage(WebServices.class).get(key, ":").split(":", 2);
}
public static void setLogin(String id, String user, String password) {
Settings settings = Settings.forPackage(WebServices.class);
String value = user.length() > 0 && password.length() > 0 ? user + ":" + password : null;
@ -251,7 +229,7 @@ public final class WebServices {
user = "";
password = "";
}
if (id.equals("osdb.user")) {
settings.put(id, value);
OpenSubtitles.setUser(user, password);
@ -259,5 +237,5 @@ public final class WebServices {
throw new IllegalArgumentException();
}
}
}

View File

@ -391,7 +391,7 @@ public class MediaDetection {
try {
for (SearchResult[] index : new SearchResult[][] { releaseInfo.getTheTVDBIndex(), releaseInfo.getAnidbIndex() }) {
for (SearchResult item : index) {
for (String name : item.getNames()) {
for (String name : item.getEffectiveNames()) {
seriesIndex.add(new SimpleEntry<String, SearchResult>(normalizePunctuation(name).toLowerCase(), item));
}
}
@ -541,7 +541,7 @@ public class MediaDetection {
// skip further queries if collected matches are already sufficient
if (options.size() > 0 && movieNameMatches.size() > 0) {
options.addAll(movieNameMatches);
return sortBySimilarity(options, terms);
return sortBySimilarity(options, terms, getMovieMatchMetric(), true);
}
// if matching name+year failed, try matching only by name
@ -590,7 +590,7 @@ public class MediaDetection {
options.addAll(movieNameMatches);
// sort by relevance
return sortBySimilarity(options, terms);
return sortBySimilarity(options, terms, getMovieMatchMetric(), true);
}
public static SimilarityMetric getMovieMatchMetric() {
@ -615,18 +615,43 @@ public class MediaDetection {
});
}
public static <T> List<T> sortBySimilarity(Collection<T> options, Collection<String> terms) throws IOException {
Collection<String> paragon = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
paragon.addAll(stripReleaseInfo(terms, true));
paragon.addAll(stripReleaseInfo(terms, false));
public static SimilarityMetric getSeriesMatchMetric() {
return new MetricAvg(new SequenceMatchSimilarity(), new NameSimilarityMetric(), new SequenceMatchSimilarity(0, true));
}
List<T> sorted = new ArrayList<T>(options);
sort(sorted, new SimilarityComparator(getMovieMatchMetric(), paragon.toArray()));
public static <T> List<T> sortBySimilarity(Collection<T> options, Collection<String> terms, SimilarityMetric metric, boolean stripReleaseInfo) throws IOException {
Collection<String> paragon = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
// clean clutter tokens if required
if (stripReleaseInfo) {
paragon.addAll(stripReleaseInfo(terms, true));
paragon.addAll(stripReleaseInfo(terms, false));
} else {
paragon.addAll(terms);
}
// similarity comparator with multi-value support
SimilarityComparator comparator = new SimilarityComparator(metric, paragon.toArray()) {
@Override
public float getMaxSimilarity(Object obj) {
float f = 0;
Collection<?> names = obj instanceof SearchResult ? ((SearchResult) obj).getEffectiveNames() : singleton(obj);
for (Object it : names) {
f = Math.max(f, super.getMaxSimilarity(it));
}
return f;
}
};
// sort output array
List<T> result = new ArrayList<T>(options);
sort(result, comparator);
// DEBUG
// System.out.format("sortBySimilarity %s => %s", terms, sorted);
// System.out.format("sortBySimilarity %s => %s%n", terms, result);
return sorted;
return result;
}
public static String reduceMovieName(String name, boolean strict) throws IOException {

View File

@ -200,7 +200,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
LinkedHashSet<String> set = new LinkedHashSet<String>(4);
set.add(removeTrailingBrackets(episode.getSeriesName()));
set.add(removeTrailingBrackets(episode.getTitle()));
for (String it : episode.getSeries().getNames()) {
for (String it : episode.getSeries().getEffectiveNames()) {
set.add(removeTrailingBrackets(it));
}

View File

@ -79,7 +79,7 @@ public class AnidbClient extends AbstractEpisodeListProvider {
@Override
protected Set<String> getFields(SearchResult it) {
return set(it.getNames());
return set(it.getEffectiveNames());
}
};

View File

@ -1,6 +1,8 @@
package net.sourceforge.filebot.web;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class Movie extends SearchResult {
@ -39,6 +41,16 @@ public class Movie extends SearchResult {
return tmdbId;
}
@Override
public List<String> getEffectiveNames() {
List<String> names = new ArrayList<String>(1 + aliasNames.length);
names.add(toString(name, year));
for (String alias : aliasNames) {
names.add(toString(alias, year));
}
return names;
}
@Override
public boolean equals(Object object) {
if (object instanceof Movie) {
@ -67,6 +79,10 @@ public class Movie extends SearchResult {
@Override
public String toString() {
return toString(name, year);
}
private static String toString(String name, int year) {
return String.format("%s (%04d)", name, year < 0 ? 0 : year);
}

View File

@ -26,7 +26,7 @@ public abstract class SearchResult implements Serializable {
return aliasNames.clone();
}
public List<String> getNames() {
public List<String> getEffectiveNames() {
return new AbstractList<String>() {
@Override

View File

@ -65,7 +65,7 @@ public class SerienjunkiesClient extends AbstractEpisodeListProvider {
@Override
protected Set<String> getFields(SearchResult series) {
return set(series.getNames());
return set(series.getEffectiveNames());
}
};

View File

@ -28,8 +28,8 @@ public class SerienjunkiesClientTest {
assertEquals(34, series.getSeriesId());
assertEquals("Alias", series.getLink());
assertEquals("Alias - Die Agentin", series.getName());
assertEquals("Alias", series.getNames().get(1));
assertEquals("Alias - Die Agentin", series.getNames().get(0));
assertEquals("Alias", series.getEffectiveNames().get(1));
assertEquals("Alias - Die Agentin", series.getEffectiveNames().get(0));
assertEquals("2001-09-30", series.getStartDate().toString());
}

View File

@ -377,7 +377,7 @@ C-N.NTFS
C-N.NTFS.No
C-P-S
C-Subs
C0NFUSED
c0nFuSed
c0re
C1
C4DVD