+ TheTVDB: extend API search with LocalSearch from cached database index
This commit is contained in:
parent
ac958bd7d3
commit
caafbca373
|
@ -44,8 +44,9 @@ sortRegexList("website/data/series-mappings.txt")
|
||||||
// ------------------------------------------------------------------------- //
|
// ------------------------------------------------------------------------- //
|
||||||
|
|
||||||
|
|
||||||
def series_out = new File("website/data/series.list.gz")
|
def series_out = new File("website/data/series.list.gz")
|
||||||
def movies_out = new File("website/data/movies.txt.gz")
|
def movies_out = new File("website/data/movies.txt.gz")
|
||||||
|
def thetvdb_out = new File("website/data/thetvdb.txt.gz")
|
||||||
|
|
||||||
def gz(file, lines) {
|
def gz(file, lines) {
|
||||||
file.withOutputStream{ out ->
|
file.withOutputStream{ out ->
|
||||||
|
@ -114,15 +115,23 @@ println "Movie Count: " + movies.size()
|
||||||
|
|
||||||
// ------------------------------------------------------------------------- //
|
// ------------------------------------------------------------------------- //
|
||||||
|
|
||||||
|
// BUILD thetvdb-index.gz
|
||||||
|
def thetvdb_index_url = new URL('http://thetvdb.com/?string=&searchseriesid=&tab=listseries&function=Search')
|
||||||
|
def thetvdb_index = thetvdb_index_url.fetch().getHtml('UTF-8')
|
||||||
|
.depthFirst().TABLE.find{it['@id'] == "listtable"}
|
||||||
|
.depthFirst().TR.findAll{ it.TD.size() == 3 && it.TD[1].text() == 'English' && it.TD[0].A.text() }
|
||||||
|
.findResults{ [it.TD[2].text(), it.TD[0].A.text()] }
|
||||||
|
|
||||||
|
// join and sort
|
||||||
|
def thetvdb = thetvdb_index.findResults{ [it[0].pad(6), it[1]].join('\t') }.sort()
|
||||||
|
gz(thetvdb_out, thetvdb)
|
||||||
|
println "TheTVDB Index: " + thetvdb.size()
|
||||||
|
|
||||||
|
|
||||||
// BUILD series.list.gz
|
// BUILD series.list.gz
|
||||||
|
|
||||||
// TheTVDB
|
// TheTVDB
|
||||||
def thetvdb_index = new URL('http://thetvdb.com/?string=&searchseriesid=&tab=listseries&function=Search')
|
def thetvdb_names = thetvdb_index.findResults{ it[1] }
|
||||||
def thetvdb_names = thetvdb_index.fetch().getHtml('UTF-8')
|
|
||||||
.depthFirst().TABLE.find{it['@id'] == "listtable"}
|
|
||||||
.depthFirst().TR.findAll{ it.TD.size() == 3 && it.TD[1].text() == 'English'}
|
|
||||||
.findResults{ it.TD[0].A.text() }
|
|
||||||
|
|
||||||
// AniDB
|
// AniDB
|
||||||
def anidb_names = net.sourceforge.filebot.WebServices.AniDB.getAnimeTitles().findResults{ [it.getPrimaryTitle(), it.getOfficialTitle('en')] }.flatten()
|
def anidb_names = net.sourceforge.filebot.WebServices.AniDB.getAnimeTitles().findResults{ [it.getPrimaryTitle(), it.getOfficialTitle('en')] }.flatten()
|
||||||
|
@ -141,7 +150,7 @@ dokuwiki_index.getText('UTF-8').eachLine{
|
||||||
|
|
||||||
def names = [thetvdb_names, anidb_names]
|
def names = [thetvdb_names, anidb_names]
|
||||||
names.each{ if (it.size() == 0) throw new Exception("Failed to scrape series names") } // sanity check
|
names.each{ if (it.size() == 0) throw new Exception("Failed to scrape series names") } // sanity check
|
||||||
names = names.flatten().findAll{ it =~ /^[A-Z0-9]/ && it =~ /[\p{Alpha}]{3}/}.findResults{ net.sourceforge.filebot.similarity.Normalization.normalizePunctuation(it) } // collect and normalize names
|
names = names.flatten().findAll{ it =~ /^[A-Z0-9]/ && it =~ /[\p{Alpha}]{3}/}.findResults{ net.sourceforge.filebot.similarity.Normalization.normalizePunctuation(it).toLowerCase() } // collect and normalize names
|
||||||
|
|
||||||
def seriesSorter = new TreeSet(String.CASE_INSENSITIVE_ORDER)
|
def seriesSorter = new TreeSet(String.CASE_INSENSITIVE_ORDER)
|
||||||
seriesSorter.addAll(names)
|
seriesSorter.addAll(names)
|
||||||
|
|
|
@ -2,14 +2,33 @@
|
||||||
package net.sourceforge.filebot;
|
package net.sourceforge.filebot;
|
||||||
|
|
||||||
|
|
||||||
|
import static java.util.Arrays.*;
|
||||||
|
import static java.util.Collections.*;
|
||||||
import static net.sourceforge.filebot.Settings.*;
|
import static net.sourceforge.filebot.Settings.*;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.Future;
|
||||||
|
import java.util.logging.Level;
|
||||||
|
import java.util.logging.Logger;
|
||||||
|
|
||||||
|
import net.sourceforge.filebot.media.MediaDetection;
|
||||||
import net.sourceforge.filebot.web.AnidbClient;
|
import net.sourceforge.filebot.web.AnidbClient;
|
||||||
import net.sourceforge.filebot.web.EpisodeListProvider;
|
import net.sourceforge.filebot.web.EpisodeListProvider;
|
||||||
import net.sourceforge.filebot.web.FanartTV;
|
import net.sourceforge.filebot.web.FanartTV;
|
||||||
import net.sourceforge.filebot.web.IMDbClient;
|
import net.sourceforge.filebot.web.IMDbClient;
|
||||||
|
import net.sourceforge.filebot.web.LocalSearch;
|
||||||
import net.sourceforge.filebot.web.MovieIdentificationService;
|
import net.sourceforge.filebot.web.MovieIdentificationService;
|
||||||
import net.sourceforge.filebot.web.OpenSubtitlesClient;
|
import net.sourceforge.filebot.web.OpenSubtitlesClient;
|
||||||
|
import net.sourceforge.filebot.web.SearchResult;
|
||||||
import net.sourceforge.filebot.web.SerienjunkiesClient;
|
import net.sourceforge.filebot.web.SerienjunkiesClient;
|
||||||
import net.sourceforge.filebot.web.SublightSubtitleClient;
|
import net.sourceforge.filebot.web.SublightSubtitleClient;
|
||||||
import net.sourceforge.filebot.web.SubsceneSubtitleClient;
|
import net.sourceforge.filebot.web.SubsceneSubtitleClient;
|
||||||
|
@ -28,9 +47,11 @@ public final class WebServices {
|
||||||
// episode dbs
|
// episode dbs
|
||||||
public static final TVRageClient TVRage = new TVRageClient();
|
public static final TVRageClient TVRage = new TVRageClient();
|
||||||
public static final AnidbClient AniDB = new AnidbClient(getApplicationName().toLowerCase(), 3);
|
public static final AnidbClient AniDB = new AnidbClient(getApplicationName().toLowerCase(), 3);
|
||||||
public static final TheTVDBClient TheTVDB = new TheTVDBClient(getApplicationProperty("thetvdb.apikey"));
|
|
||||||
public static final SerienjunkiesClient Serienjunkies = new SerienjunkiesClient(getApplicationProperty("serienjunkies.apikey"));
|
public static final SerienjunkiesClient Serienjunkies = new SerienjunkiesClient(getApplicationProperty("serienjunkies.apikey"));
|
||||||
|
|
||||||
|
// extended TheTVDB module with local search
|
||||||
|
public static final TheTVDBClient TheTVDB = new TheTVDBClientWithLocalSearch(getApplicationProperty("thetvdb.apikey"));
|
||||||
|
|
||||||
// movie dbs
|
// movie dbs
|
||||||
public static final IMDbClient IMDb = new IMDbClient();
|
public static final IMDbClient IMDb = new IMDbClient();
|
||||||
public static final TMDbClient TMDb = new TMDbClient(getApplicationProperty("themoviedb.apikey"));
|
public static final TMDbClient TMDb = new TMDbClient(getApplicationProperty("themoviedb.apikey"));
|
||||||
|
@ -84,6 +105,81 @@ public final class WebServices {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static class TheTVDBClientWithLocalSearch extends TheTVDBClient {
|
||||||
|
|
||||||
|
public TheTVDBClientWithLocalSearch(String apikey) {
|
||||||
|
super(apikey);
|
||||||
|
}
|
||||||
|
|
||||||
|
// index of local thetvdb data dump
|
||||||
|
private static LocalSearch<SearchResult> localIndex;
|
||||||
|
|
||||||
|
|
||||||
|
private synchronized LocalSearch<SearchResult> getLocalIndex() throws IOException {
|
||||||
|
if (localIndex == null) {
|
||||||
|
// fetch data dump
|
||||||
|
TheTVDBSearchResult[] data = MediaDetection.releaseInfo.getTheTVDBIndex();
|
||||||
|
|
||||||
|
// index data dump
|
||||||
|
localIndex = new LocalSearch<SearchResult>(asList(data)) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Set<String> getFields(SearchResult object) {
|
||||||
|
return set(object.getName());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return localIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
@Override
|
||||||
|
public List<SearchResult> fetchSearchResult(final String query, final Locale locale) throws Exception {
|
||||||
|
Callable<List<SearchResult>> apiSearch = new Callable<List<SearchResult>>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<SearchResult> call() throws Exception {
|
||||||
|
return TheTVDBClientWithLocalSearch.super.fetchSearchResult(query, locale);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Callable<List<SearchResult>> localSearch = new Callable<List<SearchResult>>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<SearchResult> call() throws Exception {
|
||||||
|
try {
|
||||||
|
return getLocalIndex().search(query);
|
||||||
|
} catch (Exception e) {
|
||||||
|
Logger.getLogger(TheTVDBClientWithLocalSearch.class.getName()).log(Level.SEVERE, e.getMessage(), e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// let local search fail gracefully without affecting API search
|
||||||
|
return emptyList();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
ExecutorService executor = Executors.newFixedThreadPool(2);
|
||||||
|
try {
|
||||||
|
Set<SearchResult> results = new LinkedHashSet<SearchResult>();
|
||||||
|
|
||||||
|
for (Future<List<SearchResult>> resultSet : executor.invokeAll(asList(localSearch, apiSearch))) {
|
||||||
|
try {
|
||||||
|
results.addAll(resultSet.get());
|
||||||
|
} catch (ExecutionException e) {
|
||||||
|
if (e.getCause() instanceof Exception) {
|
||||||
|
throw (Exception) e.getCause(); // unwrap cause
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new ArrayList<SearchResult>(results);
|
||||||
|
} finally {
|
||||||
|
executor.shutdownNow();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Dummy constructor to prevent instantiation.
|
* Dummy constructor to prevent instantiation.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -37,6 +37,7 @@ import java.util.zip.GZIPInputStream;
|
||||||
|
|
||||||
import net.sourceforge.filebot.web.CachedResource;
|
import net.sourceforge.filebot.web.CachedResource;
|
||||||
import net.sourceforge.filebot.web.Movie;
|
import net.sourceforge.filebot.web.Movie;
|
||||||
|
import net.sourceforge.filebot.web.TheTVDBClient.TheTVDBSearchResult;
|
||||||
import net.sourceforge.tuned.ByteBufferInputStream;
|
import net.sourceforge.tuned.ByteBufferInputStream;
|
||||||
|
|
||||||
|
|
||||||
|
@ -242,6 +243,11 @@ public class ReleaseInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public TheTVDBSearchResult[] getTheTVDBIndex() throws IOException {
|
||||||
|
return theTVDBIndexResource.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public Map<Pattern, String> getSeriesDirectMappings() throws IOException {
|
public Map<Pattern, String> getSeriesDirectMappings() throws IOException {
|
||||||
Map<Pattern, String> mappings = new LinkedHashMap<Pattern, String>();
|
Map<Pattern, String> mappings = new LinkedHashMap<Pattern, String>();
|
||||||
for (String line : seriesDirectMappingsResource.get()) {
|
for (String line : seriesDirectMappingsResource.get()) {
|
||||||
|
@ -268,8 +274,9 @@ public class ReleaseInfo {
|
||||||
protected final CachedResource<String[]> queryBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.query-blacklist"));
|
protected final CachedResource<String[]> queryBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.query-blacklist"));
|
||||||
protected final CachedResource<String[]> excludeBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.exclude-blacklist"));
|
protected final CachedResource<String[]> excludeBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.exclude-blacklist"));
|
||||||
protected final CachedResource<Movie[]> movieListResource = new MovieResource(getBundle(getClass().getName()).getString("url.movie-list"));
|
protected final CachedResource<Movie[]> movieListResource = new MovieResource(getBundle(getClass().getName()).getString("url.movie-list"));
|
||||||
protected final CachedResource<String[]> seriesListResource = new SeriesResource(getBundle(getClass().getName()).getString("url.series-list"));
|
protected final CachedResource<String[]> seriesListResource = new SeriesListResource(getBundle(getClass().getName()).getString("url.series-list"));
|
||||||
protected final CachedResource<String[]> seriesDirectMappingsResource = new PatternResource(getBundle(getClass().getName()).getString("url.series-mappings"));
|
protected final CachedResource<String[]> seriesDirectMappingsResource = new PatternResource(getBundle(getClass().getName()).getString("url.series-mappings"));
|
||||||
|
protected final CachedResource<TheTVDBSearchResult[]> theTVDBIndexResource = new TheTVDBIndexResource(getBundle(getClass().getName()).getString("url.thetvdb-index"));
|
||||||
|
|
||||||
|
|
||||||
protected static class PatternResource extends CachedResource<String[]> {
|
protected static class PatternResource extends CachedResource<String[]> {
|
||||||
|
@ -310,16 +317,39 @@ public class ReleaseInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected static class SeriesResource extends CachedResource<String[]> {
|
protected static class SeriesListResource extends CachedResource<String[]> {
|
||||||
|
|
||||||
public SeriesResource(String resource) {
|
public SeriesListResource(String resource) {
|
||||||
super(resource, String[].class, 7 * 24 * 60 * 60 * 1000); // check for updates once a week
|
super(resource, String[].class, 7 * 24 * 60 * 60 * 1000); // check for updates once a week
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String[] process(ByteBuffer data) throws IOException {
|
public String[] process(ByteBuffer data) throws IOException {
|
||||||
return readAll(new InputStreamReader(new GZIPInputStream(new ByteBufferInputStream(data)), "utf-8")).split("\\n");
|
return readAll(new InputStreamReader(new GZIPInputStream(new ByteBufferInputStream(data)), "UTF-8")).split("\\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected static class TheTVDBIndexResource extends CachedResource<TheTVDBSearchResult[]> {
|
||||||
|
|
||||||
|
public TheTVDBIndexResource(String resource) {
|
||||||
|
super(resource, TheTVDBSearchResult[].class, 7 * 24 * 60 * 60 * 1000); // check for updates once a week
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TheTVDBSearchResult[] process(ByteBuffer data) throws IOException {
|
||||||
|
Scanner scanner = new Scanner(new GZIPInputStream(new ByteBufferInputStream(data)), "UTF-8").useDelimiter("\t|\n");
|
||||||
|
|
||||||
|
List<TheTVDBSearchResult> tvshows = new ArrayList<TheTVDBSearchResult>();
|
||||||
|
while (scanner.hasNext()) {
|
||||||
|
int id = scanner.nextInt();
|
||||||
|
String name = scanner.next().trim();
|
||||||
|
tvshows.add(new TheTVDBSearchResult(name, id));
|
||||||
|
}
|
||||||
|
|
||||||
|
return tvshows.toArray(new TheTVDBSearchResult[0]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,5 +22,8 @@ url.movie-list: http://filebot.sourceforge.net/data/movies.txt.gz
|
||||||
# list of tv show and anime names
|
# list of tv show and anime names
|
||||||
url.series-list: http://filebot.sourceforge.net/data/series.list.gz
|
url.series-list: http://filebot.sourceforge.net/data/series.list.gz
|
||||||
|
|
||||||
|
# TheTVDB index
|
||||||
|
url.thetvdb-index: http://filebot.sourceforge.net/data/thetvdb.txt.gz
|
||||||
|
|
||||||
# disk folder matcher
|
# disk folder matcher
|
||||||
pattern.diskfolder.entry: BDMV|HVDVD_TS|VIDEO_TS|AUDIO_TS|VCD|movie.nfo
|
pattern.diskfolder.entry: BDMV|HVDVD_TS|VIDEO_TS|AUDIO_TS|VCD|movie.nfo
|
||||||
|
|
|
@ -24,7 +24,7 @@ import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
|
||||||
import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance;
|
import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance;
|
||||||
|
|
||||||
|
|
||||||
class LocalSearch<T> {
|
public class LocalSearch<T> {
|
||||||
|
|
||||||
private final AbstractStringMetric metric = new QGramsDistance();
|
private final AbstractStringMetric metric = new QGramsDistance();
|
||||||
private final float resultMinimumSimilarity = 0.5f;
|
private final float resultMinimumSimilarity = 0.5f;
|
||||||
|
@ -75,9 +75,13 @@ class LocalSearch<T> {
|
||||||
if (entry.get() != null) {
|
if (entry.get() != null) {
|
||||||
resultSet.add(entry.get());
|
resultSet.add(entry.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Thread.interrupted()) {
|
||||||
|
throw new InterruptedException();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
executor.shutdown();
|
executor.shutdownNow();
|
||||||
}
|
}
|
||||||
|
|
||||||
// sort by similarity descending (best matches first)
|
// sort by similarity descending (best matches first)
|
||||||
|
|
|
@ -2,3 +2,4 @@ options +indexes
|
||||||
|
|
||||||
redirect 301 /data/movies.txt.gz http://sourceforge.net/projects/filebot/files/data/movies.txt.gz/download
|
redirect 301 /data/movies.txt.gz http://sourceforge.net/projects/filebot/files/data/movies.txt.gz/download
|
||||||
redirect 301 /data/series.list.gz http://sourceforge.net/projects/filebot/files/data/series.list.gz/download
|
redirect 301 /data/series.list.gz http://sourceforge.net/projects/filebot/files/data/series.list.gz/download
|
||||||
|
redirect 301 /data/thetvdb.txt.gz http://sourceforge.net/projects/filebot/files/data/thetvdb.txt.gz/download
|
||||||
|
|
|
@ -1,5 +1 @@
|
||||||
Dark.Matters.Twisted.But.True Dark Matters
|
HIMYM How I Met your Mother
|
||||||
Franklin.and.Bash Franklin & Bash
|
|
||||||
HIMYM How I Met your Mother
|
|
||||||
Rizolli.and.Isles Rizzoli & Isles
|
|
||||||
Rizzoli.and.Isles Rizzoli & Isles
|
|
Loading…
Reference in New Issue