* replace TheTVDB online search with local search as best as possible to improve search result ranking

This commit is contained in:
Reinhard Pointner 2013-10-13 14:50:45 +00:00
parent 071ee0f1b0
commit 2c91a3be2e
9 changed files with 104 additions and 85 deletions

View File

@ -1,13 +1,11 @@
package net.sourceforge.filebot;
import static java.util.Arrays.*;
import static java.util.Collections.*;
import static net.sourceforge.filebot.Settings.*;
import static net.sourceforge.filebot.media.MediaDetection.*;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
@ -20,7 +18,6 @@ import java.util.concurrent.Future;
import java.util.logging.Level;
import java.util.logging.Logger;
import net.sourceforge.filebot.media.MediaDetection;
import net.sourceforge.filebot.web.AcoustID;
import net.sourceforge.filebot.web.AnidbClient;
import net.sourceforge.filebot.web.AnidbSearchResult;
@ -41,7 +38,6 @@ import net.sourceforge.filebot.web.TheTVDBClient;
import net.sourceforge.filebot.web.TheTVDBSearchResult;
import net.sourceforge.filebot.web.VideoHashSubtitleService;
/**
* Reuse the same web service client so login, cache, etc. can be shared.
*/
@ -66,32 +62,26 @@ public final class WebServices {
public static final FanartTV FanartTV = new FanartTV(Settings.getApplicationProperty("fanart.tv.apikey"));
public static final AcoustID AcoustID = new AcoustID(Settings.getApplicationProperty("acoustid.apikey"));
public static EpisodeListProvider[] getEpisodeListProviders() {
return new EpisodeListProvider[] { TheTVDB, AniDB, TVRage, Serienjunkies };
}
public static MovieIdentificationService[] getMovieIdentificationServices() {
return new MovieIdentificationService[] { TMDb, IMDb, OpenSubtitles };
}
public static SubtitleProvider[] getSubtitleProviders() {
return new SubtitleProvider[] { OpenSubtitles };
}
public static VideoHashSubtitleService[] getVideoHashSubtitleServices() {
return new VideoHashSubtitleService[] { OpenSubtitles };
}
public static MusicIdentificationService[] getMusicIdentificationServices() {
return new MusicIdentificationService[] { AcoustID, new ID3Lookup() };
}
public static EpisodeListProvider getEpisodeListProvider(String name) {
for (EpisodeListProvider it : WebServices.getEpisodeListProviders()) {
if (it.getName().equalsIgnoreCase(name))
@ -101,7 +91,6 @@ public final class WebServices {
return null; // default
}
public static MovieIdentificationService getMovieIdentificationService(String name) {
for (MovieIdentificationService it : getMovieIdentificationServices()) {
if (it.getName().equalsIgnoreCase(name))
@ -111,7 +100,6 @@ public final class WebServices {
return null; // default
}
public static MusicIdentificationService getMusicIdentificationService(String name) {
for (MusicIdentificationService it : getMusicIdentificationServices()) {
if (it.getName().equalsIgnoreCase(name))
@ -121,7 +109,6 @@ public final class WebServices {
return null; // default
}
public static class TheTVDBClientWithLocalSearch extends TheTVDBClient {
public TheTVDBClientWithLocalSearch(String apikey) {
@ -131,18 +118,17 @@ public final class WebServices {
// index of local thetvdb data dump
private static LocalSearch<SearchResult> localIndex;
public synchronized LocalSearch<SearchResult> getLocalIndex() throws IOException {
if (localIndex == null) {
// fetch data dump
TheTVDBSearchResult[] data = MediaDetection.releaseInfo.getTheTVDBIndex();
TheTVDBSearchResult[] data = releaseInfo.getTheTVDBIndex();
// index data dump
localIndex = new LocalSearch<SearchResult>(asList(data)) {
@Override
protected Set<String> getFields(SearchResult object) {
return set(object.getNames());
return set(object.getEffectiveNames());
}
};
@ -153,7 +139,6 @@ public final class WebServices {
return localIndex;
}
public SeriesInfo getSeriesInfoByLocalIndex(String name, Locale locale) throws Exception {
List<SearchResult> results = getLocalIndex().search(name);
if (results.size() > 0) {
@ -162,7 +147,6 @@ public final class WebServices {
return null;
}
@SuppressWarnings("unchecked")
@Override
public List<SearchResult> fetchSearchResult(final String query, final Locale locale) throws Exception {
@ -191,8 +175,7 @@ public final class WebServices {
ExecutorService executor = Executors.newFixedThreadPool(2);
try {
Set<SearchResult> results = new LinkedHashSet<SearchResult>();
for (Future<List<SearchResult>> resultSet : executor.invokeAll(asList(apiSearch, localSearch))) {
for (Future<List<SearchResult>> resultSet : executor.invokeAll(asList(localSearch, apiSearch))) {
try {
results.addAll(resultSet.get());
} catch (ExecutionException e) {
@ -201,28 +184,25 @@ public final class WebServices {
}
}
}
return new ArrayList<SearchResult>(results);
return sortBySimilarity(results, singleton(query), getSeriesMatchMetric(), false);
} finally {
executor.shutdownNow();
}
};
}
public static class AnidbClientWithLocalSearch extends AnidbClient {
public AnidbClientWithLocalSearch(String client, int clientver) {
super(client, clientver);
}
@Override
public List<AnidbSearchResult> getAnimeTitles() throws Exception {
return asList(MediaDetection.releaseInfo.getAnidbIndex());
return asList(releaseInfo.getAnidbIndex());
}
}
/**
* Dummy constructor to prevent instantiation.
*/
@ -238,12 +218,10 @@ public final class WebServices {
OpenSubtitles.setUser(osdbLogin[0], osdbLogin[1]);
}
public static String[] getLogin(String key) {
return Settings.forPackage(WebServices.class).get(key, ":").split(":", 2);
}
public static void setLogin(String id, String user, String password) {
Settings settings = Settings.forPackage(WebServices.class);
String value = user.length() > 0 && password.length() > 0 ? user + ":" + password : null;

View File

@ -391,7 +391,7 @@ public class MediaDetection {
try {
for (SearchResult[] index : new SearchResult[][] { releaseInfo.getTheTVDBIndex(), releaseInfo.getAnidbIndex() }) {
for (SearchResult item : index) {
for (String name : item.getNames()) {
for (String name : item.getEffectiveNames()) {
seriesIndex.add(new SimpleEntry<String, SearchResult>(normalizePunctuation(name).toLowerCase(), item));
}
}
@ -541,7 +541,7 @@ public class MediaDetection {
// skip further queries if collected matches are already sufficient
if (options.size() > 0 && movieNameMatches.size() > 0) {
options.addAll(movieNameMatches);
return sortBySimilarity(options, terms);
return sortBySimilarity(options, terms, getMovieMatchMetric(), true);
}
// if matching name+year failed, try matching only by name
@ -590,7 +590,7 @@ public class MediaDetection {
options.addAll(movieNameMatches);
// sort by relevance
return sortBySimilarity(options, terms);
return sortBySimilarity(options, terms, getMovieMatchMetric(), true);
}
public static SimilarityMetric getMovieMatchMetric() {
@ -615,18 +615,43 @@ public class MediaDetection {
});
}
public static <T> List<T> sortBySimilarity(Collection<T> options, Collection<String> terms) throws IOException {
Collection<String> paragon = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
paragon.addAll(stripReleaseInfo(terms, true));
paragon.addAll(stripReleaseInfo(terms, false));
public static SimilarityMetric getSeriesMatchMetric() {
return new MetricAvg(new SequenceMatchSimilarity(), new NameSimilarityMetric(), new SequenceMatchSimilarity(0, true));
}
List<T> sorted = new ArrayList<T>(options);
sort(sorted, new SimilarityComparator(getMovieMatchMetric(), paragon.toArray()));
public static <T> List<T> sortBySimilarity(Collection<T> options, Collection<String> terms, SimilarityMetric metric, boolean stripReleaseInfo) throws IOException {
Collection<String> paragon = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
// clean clutter tokens if required
if (stripReleaseInfo) {
paragon.addAll(stripReleaseInfo(terms, true));
paragon.addAll(stripReleaseInfo(terms, false));
} else {
paragon.addAll(terms);
}
// similarity comparator with multi-value support
SimilarityComparator comparator = new SimilarityComparator(metric, paragon.toArray()) {
@Override
public float getMaxSimilarity(Object obj) {
float f = 0;
Collection<?> names = obj instanceof SearchResult ? ((SearchResult) obj).getEffectiveNames() : singleton(obj);
for (Object it : names) {
f = Math.max(f, super.getMaxSimilarity(it));
}
return f;
}
};
// sort output array
List<T> result = new ArrayList<T>(options);
sort(result, comparator);
// DEBUG
// System.out.format("sortBySimilarity %s => %s", terms, sorted);
// System.out.format("sortBySimilarity %s => %s%n", terms, result);
return sorted;
return result;
}
public static String reduceMovieName(String name, boolean strict) throws IOException {

View File

@ -200,7 +200,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
LinkedHashSet<String> set = new LinkedHashSet<String>(4);
set.add(removeTrailingBrackets(episode.getSeriesName()));
set.add(removeTrailingBrackets(episode.getTitle()));
for (String it : episode.getSeries().getNames()) {
for (String it : episode.getSeries().getEffectiveNames()) {
set.add(removeTrailingBrackets(it));
}

View File

@ -79,7 +79,7 @@ public class AnidbClient extends AbstractEpisodeListProvider {
@Override
protected Set<String> getFields(SearchResult it) {
return set(it.getNames());
return set(it.getEffectiveNames());
}
};

View File

@ -1,6 +1,8 @@
package net.sourceforge.filebot.web;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class Movie extends SearchResult {
@ -39,6 +41,16 @@ public class Movie extends SearchResult {
return tmdbId;
}
@Override
public List<String> getEffectiveNames() {
List<String> names = new ArrayList<String>(1 + aliasNames.length);
names.add(toString(name, year));
for (String alias : aliasNames) {
names.add(toString(alias, year));
}
return names;
}
@Override
public boolean equals(Object object) {
if (object instanceof Movie) {
@ -67,6 +79,10 @@ public class Movie extends SearchResult {
@Override
public String toString() {
return toString(name, year);
}
private static String toString(String name, int year) {
return String.format("%s (%04d)", name, year < 0 ? 0 : year);
}

View File

@ -26,7 +26,7 @@ public abstract class SearchResult implements Serializable {
return aliasNames.clone();
}
public List<String> getNames() {
public List<String> getEffectiveNames() {
return new AbstractList<String>() {
@Override

View File

@ -65,7 +65,7 @@ public class SerienjunkiesClient extends AbstractEpisodeListProvider {
@Override
protected Set<String> getFields(SearchResult series) {
return set(series.getNames());
return set(series.getEffectiveNames());
}
};

View File

@ -28,8 +28,8 @@ public class SerienjunkiesClientTest {
assertEquals(34, series.getSeriesId());
assertEquals("Alias", series.getLink());
assertEquals("Alias - Die Agentin", series.getName());
assertEquals("Alias", series.getNames().get(1));
assertEquals("Alias - Die Agentin", series.getNames().get(0));
assertEquals("Alias", series.getEffectiveNames().get(1));
assertEquals("Alias - Die Agentin", series.getEffectiveNames().get(0));
assertEquals("2001-09-30", series.getStartDate().toString());
}

View File

@ -377,7 +377,7 @@ C-N.NTFS
C-N.NTFS.No
C-P-S
C-Subs
C0NFUSED
c0nFuSed
c0re
C1
C4DVD