+ TheTVDB: extend API search with LocalSearch from cached database index
This commit is contained in:
parent
ac958bd7d3
commit
caafbca373
|
@ -44,8 +44,9 @@ sortRegexList("website/data/series-mappings.txt")
|
|||
// ------------------------------------------------------------------------- //
|
||||
|
||||
|
||||
def series_out = new File("website/data/series.list.gz")
|
||||
def movies_out = new File("website/data/movies.txt.gz")
|
||||
def series_out = new File("website/data/series.list.gz")
|
||||
def movies_out = new File("website/data/movies.txt.gz")
|
||||
def thetvdb_out = new File("website/data/thetvdb.txt.gz")
|
||||
|
||||
def gz(file, lines) {
|
||||
file.withOutputStream{ out ->
|
||||
|
@ -114,15 +115,23 @@ println "Movie Count: " + movies.size()
|
|||
|
||||
// ------------------------------------------------------------------------- //
|
||||
|
||||
// BUILD thetvdb-index.gz
|
||||
def thetvdb_index_url = new URL('http://thetvdb.com/?string=&searchseriesid=&tab=listseries&function=Search')
|
||||
def thetvdb_index = thetvdb_index_url.fetch().getHtml('UTF-8')
|
||||
.depthFirst().TABLE.find{it['@id'] == "listtable"}
|
||||
.depthFirst().TR.findAll{ it.TD.size() == 3 && it.TD[1].text() == 'English' && it.TD[0].A.text() }
|
||||
.findResults{ [it.TD[2].text(), it.TD[0].A.text()] }
|
||||
|
||||
// join and sort
|
||||
def thetvdb = thetvdb_index.findResults{ [it[0].pad(6), it[1]].join('\t') }.sort()
|
||||
gz(thetvdb_out, thetvdb)
|
||||
println "TheTVDB Index: " + thetvdb.size()
|
||||
|
||||
|
||||
// BUILD series.list.gz
|
||||
|
||||
// TheTVDB
|
||||
def thetvdb_index = new URL('http://thetvdb.com/?string=&searchseriesid=&tab=listseries&function=Search')
|
||||
def thetvdb_names = thetvdb_index.fetch().getHtml('UTF-8')
|
||||
.depthFirst().TABLE.find{it['@id'] == "listtable"}
|
||||
.depthFirst().TR.findAll{ it.TD.size() == 3 && it.TD[1].text() == 'English'}
|
||||
.findResults{ it.TD[0].A.text() }
|
||||
def thetvdb_names = thetvdb_index.findResults{ it[1] }
|
||||
|
||||
// AniDB
|
||||
def anidb_names = net.sourceforge.filebot.WebServices.AniDB.getAnimeTitles().findResults{ [it.getPrimaryTitle(), it.getOfficialTitle('en')] }.flatten()
|
||||
|
@ -141,7 +150,7 @@ dokuwiki_index.getText('UTF-8').eachLine{
|
|||
|
||||
def names = [thetvdb_names, anidb_names]
|
||||
names.each{ if (it.size() == 0) throw new Exception("Failed to scrape series names") } // sanity check
|
||||
names = names.flatten().findAll{ it =~ /^[A-Z0-9]/ && it =~ /[\p{Alpha}]{3}/}.findResults{ net.sourceforge.filebot.similarity.Normalization.normalizePunctuation(it) } // collect and normalize names
|
||||
names = names.flatten().findAll{ it =~ /^[A-Z0-9]/ && it =~ /[\p{Alpha}]{3}/}.findResults{ net.sourceforge.filebot.similarity.Normalization.normalizePunctuation(it).toLowerCase() } // collect and normalize names
|
||||
|
||||
def seriesSorter = new TreeSet(String.CASE_INSENSITIVE_ORDER)
|
||||
seriesSorter.addAll(names)
|
||||
|
|
|
@ -2,14 +2,33 @@
|
|||
package net.sourceforge.filebot;
|
||||
|
||||
|
||||
import static java.util.Arrays.*;
|
||||
import static java.util.Collections.*;
|
||||
import static net.sourceforge.filebot.Settings.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
import net.sourceforge.filebot.media.MediaDetection;
|
||||
import net.sourceforge.filebot.web.AnidbClient;
|
||||
import net.sourceforge.filebot.web.EpisodeListProvider;
|
||||
import net.sourceforge.filebot.web.FanartTV;
|
||||
import net.sourceforge.filebot.web.IMDbClient;
|
||||
import net.sourceforge.filebot.web.LocalSearch;
|
||||
import net.sourceforge.filebot.web.MovieIdentificationService;
|
||||
import net.sourceforge.filebot.web.OpenSubtitlesClient;
|
||||
import net.sourceforge.filebot.web.SearchResult;
|
||||
import net.sourceforge.filebot.web.SerienjunkiesClient;
|
||||
import net.sourceforge.filebot.web.SublightSubtitleClient;
|
||||
import net.sourceforge.filebot.web.SubsceneSubtitleClient;
|
||||
|
@ -28,9 +47,11 @@ public final class WebServices {
|
|||
// episode dbs
|
||||
public static final TVRageClient TVRage = new TVRageClient();
|
||||
public static final AnidbClient AniDB = new AnidbClient(getApplicationName().toLowerCase(), 3);
|
||||
public static final TheTVDBClient TheTVDB = new TheTVDBClient(getApplicationProperty("thetvdb.apikey"));
|
||||
public static final SerienjunkiesClient Serienjunkies = new SerienjunkiesClient(getApplicationProperty("serienjunkies.apikey"));
|
||||
|
||||
// extended TheTVDB module with local search
|
||||
public static final TheTVDBClient TheTVDB = new TheTVDBClientWithLocalSearch(getApplicationProperty("thetvdb.apikey"));
|
||||
|
||||
// movie dbs
|
||||
public static final IMDbClient IMDb = new IMDbClient();
|
||||
public static final TMDbClient TMDb = new TMDbClient(getApplicationProperty("themoviedb.apikey"));
|
||||
|
@ -84,6 +105,81 @@ public final class WebServices {
|
|||
}
|
||||
|
||||
|
||||
private static class TheTVDBClientWithLocalSearch extends TheTVDBClient {
|
||||
|
||||
public TheTVDBClientWithLocalSearch(String apikey) {
|
||||
super(apikey);
|
||||
}
|
||||
|
||||
// index of local thetvdb data dump
|
||||
private static LocalSearch<SearchResult> localIndex;
|
||||
|
||||
|
||||
private synchronized LocalSearch<SearchResult> getLocalIndex() throws IOException {
|
||||
if (localIndex == null) {
|
||||
// fetch data dump
|
||||
TheTVDBSearchResult[] data = MediaDetection.releaseInfo.getTheTVDBIndex();
|
||||
|
||||
// index data dump
|
||||
localIndex = new LocalSearch<SearchResult>(asList(data)) {
|
||||
|
||||
@Override
|
||||
protected Set<String> getFields(SearchResult object) {
|
||||
return set(object.getName());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
return localIndex;
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public List<SearchResult> fetchSearchResult(final String query, final Locale locale) throws Exception {
|
||||
Callable<List<SearchResult>> apiSearch = new Callable<List<SearchResult>>() {
|
||||
|
||||
@Override
|
||||
public List<SearchResult> call() throws Exception {
|
||||
return TheTVDBClientWithLocalSearch.super.fetchSearchResult(query, locale);
|
||||
}
|
||||
};
|
||||
Callable<List<SearchResult>> localSearch = new Callable<List<SearchResult>>() {
|
||||
|
||||
@Override
|
||||
public List<SearchResult> call() throws Exception {
|
||||
try {
|
||||
return getLocalIndex().search(query);
|
||||
} catch (Exception e) {
|
||||
Logger.getLogger(TheTVDBClientWithLocalSearch.class.getName()).log(Level.SEVERE, e.getMessage(), e);
|
||||
}
|
||||
|
||||
// let local search fail gracefully without affecting API search
|
||||
return emptyList();
|
||||
}
|
||||
};
|
||||
|
||||
ExecutorService executor = Executors.newFixedThreadPool(2);
|
||||
try {
|
||||
Set<SearchResult> results = new LinkedHashSet<SearchResult>();
|
||||
|
||||
for (Future<List<SearchResult>> resultSet : executor.invokeAll(asList(localSearch, apiSearch))) {
|
||||
try {
|
||||
results.addAll(resultSet.get());
|
||||
} catch (ExecutionException e) {
|
||||
if (e.getCause() instanceof Exception) {
|
||||
throw (Exception) e.getCause(); // unwrap cause
|
||||
}
|
||||
}
|
||||
}
|
||||
return new ArrayList<SearchResult>(results);
|
||||
} finally {
|
||||
executor.shutdownNow();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Dummy constructor to prevent instantiation.
|
||||
*/
|
||||
|
|
|
@ -37,6 +37,7 @@ import java.util.zip.GZIPInputStream;
|
|||
|
||||
import net.sourceforge.filebot.web.CachedResource;
|
||||
import net.sourceforge.filebot.web.Movie;
|
||||
import net.sourceforge.filebot.web.TheTVDBClient.TheTVDBSearchResult;
|
||||
import net.sourceforge.tuned.ByteBufferInputStream;
|
||||
|
||||
|
||||
|
@ -242,6 +243,11 @@ public class ReleaseInfo {
|
|||
}
|
||||
|
||||
|
||||
public TheTVDBSearchResult[] getTheTVDBIndex() throws IOException {
|
||||
return theTVDBIndexResource.get();
|
||||
}
|
||||
|
||||
|
||||
public Map<Pattern, String> getSeriesDirectMappings() throws IOException {
|
||||
Map<Pattern, String> mappings = new LinkedHashMap<Pattern, String>();
|
||||
for (String line : seriesDirectMappingsResource.get()) {
|
||||
|
@ -268,8 +274,9 @@ public class ReleaseInfo {
|
|||
protected final CachedResource<String[]> queryBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.query-blacklist"));
|
||||
protected final CachedResource<String[]> excludeBlacklistResource = new PatternResource(getBundle(getClass().getName()).getString("url.exclude-blacklist"));
|
||||
protected final CachedResource<Movie[]> movieListResource = new MovieResource(getBundle(getClass().getName()).getString("url.movie-list"));
|
||||
protected final CachedResource<String[]> seriesListResource = new SeriesResource(getBundle(getClass().getName()).getString("url.series-list"));
|
||||
protected final CachedResource<String[]> seriesListResource = new SeriesListResource(getBundle(getClass().getName()).getString("url.series-list"));
|
||||
protected final CachedResource<String[]> seriesDirectMappingsResource = new PatternResource(getBundle(getClass().getName()).getString("url.series-mappings"));
|
||||
protected final CachedResource<TheTVDBSearchResult[]> theTVDBIndexResource = new TheTVDBIndexResource(getBundle(getClass().getName()).getString("url.thetvdb-index"));
|
||||
|
||||
|
||||
protected static class PatternResource extends CachedResource<String[]> {
|
||||
|
@ -310,16 +317,39 @@ public class ReleaseInfo {
|
|||
}
|
||||
|
||||
|
||||
protected static class SeriesResource extends CachedResource<String[]> {
|
||||
protected static class SeriesListResource extends CachedResource<String[]> {
|
||||
|
||||
public SeriesResource(String resource) {
|
||||
public SeriesListResource(String resource) {
|
||||
super(resource, String[].class, 7 * 24 * 60 * 60 * 1000); // check for updates once a week
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String[] process(ByteBuffer data) throws IOException {
|
||||
return readAll(new InputStreamReader(new GZIPInputStream(new ByteBufferInputStream(data)), "utf-8")).split("\\n");
|
||||
return readAll(new InputStreamReader(new GZIPInputStream(new ByteBufferInputStream(data)), "UTF-8")).split("\\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected static class TheTVDBIndexResource extends CachedResource<TheTVDBSearchResult[]> {
|
||||
|
||||
public TheTVDBIndexResource(String resource) {
|
||||
super(resource, TheTVDBSearchResult[].class, 7 * 24 * 60 * 60 * 1000); // check for updates once a week
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public TheTVDBSearchResult[] process(ByteBuffer data) throws IOException {
|
||||
Scanner scanner = new Scanner(new GZIPInputStream(new ByteBufferInputStream(data)), "UTF-8").useDelimiter("\t|\n");
|
||||
|
||||
List<TheTVDBSearchResult> tvshows = new ArrayList<TheTVDBSearchResult>();
|
||||
while (scanner.hasNext()) {
|
||||
int id = scanner.nextInt();
|
||||
String name = scanner.next().trim();
|
||||
tvshows.add(new TheTVDBSearchResult(name, id));
|
||||
}
|
||||
|
||||
return tvshows.toArray(new TheTVDBSearchResult[0]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -22,5 +22,8 @@ url.movie-list: http://filebot.sourceforge.net/data/movies.txt.gz
|
|||
# list of tv show and anime names
|
||||
url.series-list: http://filebot.sourceforge.net/data/series.list.gz
|
||||
|
||||
# TheTVDB index
|
||||
url.thetvdb-index: http://filebot.sourceforge.net/data/thetvdb.txt.gz
|
||||
|
||||
# disk folder matcher
|
||||
pattern.diskfolder.entry: BDMV|HVDVD_TS|VIDEO_TS|AUDIO_TS|VCD|movie.nfo
|
||||
|
|
|
@ -24,7 +24,7 @@ import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
|
|||
import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance;
|
||||
|
||||
|
||||
class LocalSearch<T> {
|
||||
public class LocalSearch<T> {
|
||||
|
||||
private final AbstractStringMetric metric = new QGramsDistance();
|
||||
private final float resultMinimumSimilarity = 0.5f;
|
||||
|
@ -75,9 +75,13 @@ class LocalSearch<T> {
|
|||
if (entry.get() != null) {
|
||||
resultSet.add(entry.get());
|
||||
}
|
||||
|
||||
if (Thread.interrupted()) {
|
||||
throw new InterruptedException();
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
executor.shutdown();
|
||||
executor.shutdownNow();
|
||||
}
|
||||
|
||||
// sort by similarity descending (best matches first)
|
||||
|
|
|
@ -2,3 +2,4 @@ options +indexes
|
|||
|
||||
redirect 301 /data/movies.txt.gz http://sourceforge.net/projects/filebot/files/data/movies.txt.gz/download
|
||||
redirect 301 /data/series.list.gz http://sourceforge.net/projects/filebot/files/data/series.list.gz/download
|
||||
redirect 301 /data/thetvdb.txt.gz http://sourceforge.net/projects/filebot/files/data/thetvdb.txt.gz/download
|
||||
|
|
|
@ -1,5 +1 @@
|
|||
Dark.Matters.Twisted.But.True Dark Matters
|
||||
Franklin.and.Bash Franklin & Bash
|
||||
HIMYM How I Met your Mother
|
||||
Rizolli.and.Isles Rizzoli & Isles
|
||||
Rizzoli.and.Isles Rizzoli & Isles
|
Loading…
Reference in New Issue