+ use OpenSubtitles dump for OpenSubtitles local search

This commit is contained in:
Reinhard Pointner 2015-05-11 09:13:35 +00:00
parent d95e4a985e
commit 0e978412df
7 changed files with 120 additions and 83 deletions

View File

@ -44,6 +44,8 @@ println "Reviews: " + reviews.size()
def moviedb_out = new File("website/data/moviedb.txt")
def thetvdb_out = new File("website/data/thetvdb.txt")
def anidb_out = new File("website/data/anidb.txt")
def osdb_out = new File("website/data/osdb.txt")
def pack(file, lines) {
new File(file.parentFile, file.name + '.xz').withOutputStream{ out ->
@ -105,6 +107,31 @@ def csv(f, delim, keyIndex, valueIndex) {
/* ------------------------------------------------------------------------- */
// BUILD osdb index
def osdb = []
new File('osdb.txt').eachLine('UTF-8'){
def fields = it.split(/\t/)*.trim()
// 0 IDMovie, 1 IDMovieImdb, 2 MovieName, 3 MovieYear, 4 MovieKind, 5 MoviePriority
if (fields.size() == 6 && fields[1] ==~ /\d+/ && fields[3] ==~ /\d{4}/) {
if (fields[4] ==~ /movie|tv.series/ && isValidMovieName(fields[2]) && (fields[3] as int) >= 1970 && (fields[5] as int) >= 100) {
osdb << [fields[1] as int, fields[2], fields[3] as int, fields[4] == /movie/ ? 'm' : fields[4] == /movie/ ? 's' : '?', fields[5] as int]
}
}
}
// 0 imdbid, 1 name, 2 year, 3 kind, 4 priority
osdb = osdb.sort{ it[4] }
// sanity check
if (osdb.size() < 30000) { die('OSDB index sanity failed:' + osdb.size()) }
pack(osdb_out, osdb*.join('\t'))
/* ------------------------------------------------------------------------- */
// BUILD moviedb index
def omdb = []
new File('omdb.txt').eachLine('Windows-1252'){

View File

@ -7,8 +7,6 @@ import static net.filebot.media.MediaDetection.*;
import static net.filebot.util.FileUtilities.*;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
@ -30,19 +28,17 @@ import net.filebot.web.EpisodeListProvider;
import net.filebot.web.FanartTVClient;
import net.filebot.web.ID3Lookup;
import net.filebot.web.LocalSearch;
import net.filebot.web.Movie;
import net.filebot.web.MovieIdentificationService;
import net.filebot.web.MusicIdentificationService;
import net.filebot.web.OMDbClient;
import net.filebot.web.OpenSubtitlesClient;
import net.filebot.web.OpenSubtitlesSearchResult;
import net.filebot.web.SearchResult;
import net.filebot.web.SubtitleDescriptor;
import net.filebot.web.SubtitleProvider;
import net.filebot.web.TMDbClient;
import net.filebot.web.TVRageClient;
import net.filebot.web.TheTVDBClient;
import net.filebot.web.TheTVDBSearchResult;
import net.filebot.web.TheTVDBSeriesInfo;
import net.filebot.web.VideoHashSubtitleService;
/**
@ -62,7 +58,7 @@ public final class WebServices {
public static final TMDbClient TheMovieDB = new TMDbClient(getApiKey("themoviedb"));
// subtitle dbs
public static final OpenSubtitlesClient OpenSubtitles = new OpenSubtitlesClientWithLocalSearch(getApiKey("opensubtitles"), getApplicationVersion(), TheTVDB, TheMovieDB);
public static final OpenSubtitlesClient OpenSubtitles = new OpenSubtitlesClientWithLocalSearch(getApiKey("opensubtitles"), getApplicationVersion());
// misc
public static final FanartTVClient FanartTV = new FanartTVClient(Settings.getApiKey("fanart.tv"));
@ -178,81 +174,38 @@ public final class WebServices {
public static class OpenSubtitlesClientWithLocalSearch extends OpenSubtitlesClient {
private final EpisodeListProvider seriesIndex;
private final MovieIdentificationService movieIndex;
public OpenSubtitlesClientWithLocalSearch(String name, String version, EpisodeListProvider seriesIndex, MovieIdentificationService movieIndex) {
public OpenSubtitlesClientWithLocalSearch(String name, String version) {
super(name, version);
this.seriesIndex = seriesIndex;
this.movieIndex = movieIndex;
}
// index of local OpenSubtitles data dump
private static LocalSearch<SearchResult> localIndex;
public synchronized LocalSearch<SearchResult> getLocalIndex() throws IOException {
if (localIndex == null) {
// fetch data dump
OpenSubtitlesSearchResult[] data = releaseInfo.getOpenSubtitlesIndex();
// index data dump
localIndex = new LocalSearch<SearchResult>(asList(data)) {
@Override
protected Set<String> getFields(SearchResult object) {
return set(object.getEffectiveNames());
}
};
}
return localIndex;
}
@Override
public synchronized List<SearchResult> search(final String query, final boolean byMovie, final boolean bySeries) throws Exception {
List<Callable<List<? extends SearchResult>>> queries = new ArrayList<>(2);
if (byMovie) {
queries.add(() -> movieIndex.searchMovie(query, Locale.ENGLISH));
}
if (bySeries) {
queries.add(() -> seriesIndex.search(query, Locale.ENGLISH));
}
List<SearchResult> results = getLocalIndex().search(query);
Set<SearchResult> results = new LinkedHashSet<SearchResult>();
for (Future<List<? extends SearchResult>> resultSet : requestThreadPool.invokeAll(queries)) {
try {
results.addAll(resultSet.get());
} catch (ExecutionException e) {
if (e.getCause() instanceof Exception) {
throw (Exception) e.getCause(); // unwrap cause
}
}
}
return sortBySimilarity(results, singleton(query), new MetricAvg(getSeriesMatchMetric(), getMovieMatchMetric()), false);
}
@Override
public synchronized List<SubtitleDescriptor> getSubtitleList(SearchResult searchResult, String languageName) throws Exception {
Movie id = getIMDbID(searchResult);
if (id != null) {
return super.getSubtitleList(getIMDbID(searchResult), languageName);
}
return emptyList();
}
@Override
public URI getSubtitleListLink(SearchResult searchResult, String languageName) {
try {
Movie id = getIMDbID(searchResult);
if (id != null) {
return super.getSubtitleListLink(id, languageName);
}
} catch (Exception e) {
Logger.getLogger(WebServices.class.getName()).log(Level.WARNING, e.getMessage());
}
return null;
}
public Movie getIMDbID(SearchResult result) throws Exception {
if (result instanceof TheTVDBSearchResult) {
TheTVDBSearchResult searchResult = (TheTVDBSearchResult) result;
TheTVDBSeriesInfo seriesInfo = (TheTVDBSeriesInfo) ((TheTVDBClient) seriesIndex).getSeriesInfo(searchResult, Locale.ENGLISH);
if (seriesInfo.getImdbId() != null) {
int imdbId = grepImdbId(seriesInfo.getImdbId()).iterator().next();
return new Movie(seriesInfo.getName(), seriesInfo.getStartDate().getYear(), imdbId, -1);
}
}
if (result instanceof Movie) {
Movie m = (Movie) result;
if (m.getImdbId() > 0)
return m;
// fetch extended movie info
m = movieIndex.getMovieDescriptor(m, Locale.ENGLISH);
if (m.getImdbId() > 0)
return m;
}
return null;
}
}
/**

View File

@ -36,6 +36,7 @@ import net.filebot.util.FileUtilities.RegexFileFilter;
import net.filebot.web.AnidbSearchResult;
import net.filebot.web.CachedResource;
import net.filebot.web.Movie;
import net.filebot.web.OpenSubtitlesSearchResult;
import net.filebot.web.TheTVDBSearchResult;
import org.tukaani.xz.XZInputStream;
@ -305,6 +306,10 @@ public class ReleaseInfo {
return anidbIndexResource.get();
}
public OpenSubtitlesSearchResult[] getOpenSubtitlesIndex() throws IOException {
return osdbIndexResource.get();
}
private Map<Pattern, String> seriesDirectMappings;
public Map<Pattern, String> getSeriesDirectMappings() throws IOException {
@ -349,6 +354,7 @@ public class ReleaseInfo {
protected final CachedResource<String[]> seriesDirectMappingsResource = new PatternResource(getProperty("url.series-mappings"));
protected final CachedResource<TheTVDBSearchResult[]> tvdbIndexResource = new TheTVDBIndexResource(getProperty("url.thetvdb-index"));
protected final CachedResource<AnidbSearchResult[]> anidbIndexResource = new AnidbIndexResource(getProperty("url.anidb-index"));
protected final CachedResource<OpenSubtitlesSearchResult[]> osdbIndexResource = new OpenSubtitlesIndexResource(getProperty("url.osdb-index"));
protected String getProperty(String propertyName) {
// allow override via Java System properties
@ -416,7 +422,7 @@ public class ReleaseInfo {
protected static class AnidbIndexResource extends CachedResource<AnidbSearchResult[]> {
public AnidbIndexResource(String resource) {
super(resource, AnidbSearchResult[].class, ONE_WEEK); // check for updates every month
super(resource, AnidbSearchResult[].class, ONE_WEEK); // check for updates every week
}
@Override
@ -435,6 +441,30 @@ public class ReleaseInfo {
}
}
protected static class OpenSubtitlesIndexResource extends CachedResource<OpenSubtitlesSearchResult[]> {
public OpenSubtitlesIndexResource(String resource) {
super(resource, OpenSubtitlesSearchResult[].class, ONE_MONTH); // check for updates every month
}
@Override
public OpenSubtitlesSearchResult[] process(ByteBuffer data) throws IOException {
List<String[]> rows = readCSV(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8", "\t");
List<OpenSubtitlesSearchResult> result = new ArrayList<OpenSubtitlesSearchResult>(rows.size());
for (String[] row : rows) {
int imdbid = parseInt(row[0]);
String name = row[1];
int year = parseInt(row[2]);
char kind = row[3].charAt(0);
int score = parseInt(row[4]);
result.add(new OpenSubtitlesSearchResult(imdbid, name, year, kind, score));
}
return result.toArray(new OpenSubtitlesSearchResult[0]);
}
}
protected static class FolderEntryFilter implements FileFilter {
private final Pattern entryPattern;

View File

@ -34,6 +34,9 @@ url.thetvdb-index: http://app.filebot.net/data/thetvdb.txt.xz
# AniDB index
url.anidb-index: http://app.filebot.net/data/anidb.txt.xz
# OpenSubtitles index
url.osdb-index: http://app.filebot.net/data/osdb.txt.xz
# disk folder matcher
pattern.diskfolder.entry: BDMV|HVDVD_TS|VIDEO_TS|AUDIO_TS|VCD|MovieObject.bdmv|VIDEO_TS.VOB

View File

@ -19,8 +19,6 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
@ -48,7 +46,6 @@ import net.filebot.ui.SelectDialog;
import net.filebot.util.ui.LabelProvider;
import net.filebot.util.ui.LinkButton;
import net.filebot.util.ui.SimpleLabelProvider;
import net.filebot.web.Movie;
import net.filebot.web.OpenSubtitlesClient;
import net.filebot.web.SearchResult;
import net.filebot.web.SubtitleDescriptor;
@ -152,12 +149,9 @@ public class SubtitlePanel extends AbstractSearchPanel<SubtitleProvider, Subtitl
};
protected Collection<String> getHistory(SubtitleProvider engine) throws Exception {
final Set<String> names = new TreeSet<String>();
for (Movie it : MediaDetection.releaseInfo.getMovieList()) {
names.add(it.getName());
}
for (SearchResult it : MediaDetection.releaseInfo.getTheTVDBIndex()) {
names.add(it.getName());
List<String> names = new ArrayList<String>();
for (SearchResult it : MediaDetection.releaseInfo.getOpenSubtitlesIndex()) {
names.addAll(it.getEffectiveNames());
}
return names;
};

View File

@ -0,0 +1,28 @@
package net.filebot.web;
import java.util.Locale;
public class OpenSubtitlesSearchResult extends Movie {
public static final char KIND_MOVIE = 'm';
public static final char KIND_SERIES = 's';
private char kind;
private int score;
public OpenSubtitlesSearchResult(int imdbId, String name, int year, char kind, int score) {
super(name, null, year, imdbId, -1, Locale.ENGLISH);
this.kind = kind;
this.score = score;
}
public char getKind() {
return kind;
}
public int getScore() {
return score;
}
}

View File

@ -17,7 +17,7 @@ public abstract class SearchResult implements Serializable {
public SearchResult(String name, String[] aliasNames) {
this.name = name;
this.aliasNames = aliasNames.clone();
this.aliasNames = (aliasNames == null || aliasNames.length == 0) ? EMPTY_STRING_ARRAY : aliasNames.clone();
}
public String getName() {
@ -57,4 +57,6 @@ public abstract class SearchResult implements Serializable {
return name;
}
private static final String[] EMPTY_STRING_ARRAY = new String[0];
}