+ use OpenSubtitles dump for OpenSubtitles local search
This commit is contained in:
parent
d95e4a985e
commit
0e978412df
|
@ -44,6 +44,8 @@ println "Reviews: " + reviews.size()
|
|||
def moviedb_out = new File("website/data/moviedb.txt")
|
||||
def thetvdb_out = new File("website/data/thetvdb.txt")
|
||||
def anidb_out = new File("website/data/anidb.txt")
|
||||
def osdb_out = new File("website/data/osdb.txt")
|
||||
|
||||
|
||||
def pack(file, lines) {
|
||||
new File(file.parentFile, file.name + '.xz').withOutputStream{ out ->
|
||||
|
@ -105,6 +107,31 @@ def csv(f, delim, keyIndex, valueIndex) {
|
|||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
|
||||
// BUILD osdb index
|
||||
def osdb = []
|
||||
|
||||
new File('osdb.txt').eachLine('UTF-8'){
|
||||
def fields = it.split(/\t/)*.trim()
|
||||
|
||||
// 0 IDMovie, 1 IDMovieImdb, 2 MovieName, 3 MovieYear, 4 MovieKind, 5 MoviePriority
|
||||
if (fields.size() == 6 && fields[1] ==~ /\d+/ && fields[3] ==~ /\d{4}/) {
|
||||
if (fields[4] ==~ /movie|tv.series/ && isValidMovieName(fields[2]) && (fields[3] as int) >= 1970 && (fields[5] as int) >= 100) {
|
||||
osdb << [fields[1] as int, fields[2], fields[3] as int, fields[4] == /movie/ ? 'm' : fields[4] == /movie/ ? 's' : '?', fields[5] as int]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 0 imdbid, 1 name, 2 year, 3 kind, 4 priority
|
||||
osdb = osdb.sort{ it[4] }
|
||||
|
||||
// sanity check
|
||||
if (osdb.size() < 30000) { die('OSDB index sanity failed:' + osdb.size()) }
|
||||
pack(osdb_out, osdb*.join('\t'))
|
||||
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
|
||||
// BUILD moviedb index
|
||||
def omdb = []
|
||||
new File('omdb.txt').eachLine('Windows-1252'){
|
||||
|
|
|
@ -7,8 +7,6 @@ import static net.filebot.media.MediaDetection.*;
|
|||
import static net.filebot.util.FileUtilities.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
@ -30,19 +28,17 @@ import net.filebot.web.EpisodeListProvider;
|
|||
import net.filebot.web.FanartTVClient;
|
||||
import net.filebot.web.ID3Lookup;
|
||||
import net.filebot.web.LocalSearch;
|
||||
import net.filebot.web.Movie;
|
||||
import net.filebot.web.MovieIdentificationService;
|
||||
import net.filebot.web.MusicIdentificationService;
|
||||
import net.filebot.web.OMDbClient;
|
||||
import net.filebot.web.OpenSubtitlesClient;
|
||||
import net.filebot.web.OpenSubtitlesSearchResult;
|
||||
import net.filebot.web.SearchResult;
|
||||
import net.filebot.web.SubtitleDescriptor;
|
||||
import net.filebot.web.SubtitleProvider;
|
||||
import net.filebot.web.TMDbClient;
|
||||
import net.filebot.web.TVRageClient;
|
||||
import net.filebot.web.TheTVDBClient;
|
||||
import net.filebot.web.TheTVDBSearchResult;
|
||||
import net.filebot.web.TheTVDBSeriesInfo;
|
||||
import net.filebot.web.VideoHashSubtitleService;
|
||||
|
||||
/**
|
||||
|
@ -62,7 +58,7 @@ public final class WebServices {
|
|||
public static final TMDbClient TheMovieDB = new TMDbClient(getApiKey("themoviedb"));
|
||||
|
||||
// subtitle dbs
|
||||
public static final OpenSubtitlesClient OpenSubtitles = new OpenSubtitlesClientWithLocalSearch(getApiKey("opensubtitles"), getApplicationVersion(), TheTVDB, TheMovieDB);
|
||||
public static final OpenSubtitlesClient OpenSubtitles = new OpenSubtitlesClientWithLocalSearch(getApiKey("opensubtitles"), getApplicationVersion());
|
||||
|
||||
// misc
|
||||
public static final FanartTVClient FanartTV = new FanartTVClient(Settings.getApiKey("fanart.tv"));
|
||||
|
@ -178,81 +174,38 @@ public final class WebServices {
|
|||
|
||||
public static class OpenSubtitlesClientWithLocalSearch extends OpenSubtitlesClient {
|
||||
|
||||
private final EpisodeListProvider seriesIndex;
|
||||
private final MovieIdentificationService movieIndex;
|
||||
|
||||
public OpenSubtitlesClientWithLocalSearch(String name, String version, EpisodeListProvider seriesIndex, MovieIdentificationService movieIndex) {
|
||||
public OpenSubtitlesClientWithLocalSearch(String name, String version) {
|
||||
super(name, version);
|
||||
this.seriesIndex = seriesIndex;
|
||||
this.movieIndex = movieIndex;
|
||||
}
|
||||
|
||||
// index of local OpenSubtitles data dump
|
||||
private static LocalSearch<SearchResult> localIndex;
|
||||
|
||||
public synchronized LocalSearch<SearchResult> getLocalIndex() throws IOException {
|
||||
if (localIndex == null) {
|
||||
// fetch data dump
|
||||
OpenSubtitlesSearchResult[] data = releaseInfo.getOpenSubtitlesIndex();
|
||||
|
||||
// index data dump
|
||||
localIndex = new LocalSearch<SearchResult>(asList(data)) {
|
||||
|
||||
@Override
|
||||
protected Set<String> getFields(SearchResult object) {
|
||||
return set(object.getEffectiveNames());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
return localIndex;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized List<SearchResult> search(final String query, final boolean byMovie, final boolean bySeries) throws Exception {
|
||||
List<Callable<List<? extends SearchResult>>> queries = new ArrayList<>(2);
|
||||
if (byMovie) {
|
||||
queries.add(() -> movieIndex.searchMovie(query, Locale.ENGLISH));
|
||||
}
|
||||
if (bySeries) {
|
||||
queries.add(() -> seriesIndex.search(query, Locale.ENGLISH));
|
||||
}
|
||||
List<SearchResult> results = getLocalIndex().search(query);
|
||||
|
||||
Set<SearchResult> results = new LinkedHashSet<SearchResult>();
|
||||
for (Future<List<? extends SearchResult>> resultSet : requestThreadPool.invokeAll(queries)) {
|
||||
try {
|
||||
results.addAll(resultSet.get());
|
||||
} catch (ExecutionException e) {
|
||||
if (e.getCause() instanceof Exception) {
|
||||
throw (Exception) e.getCause(); // unwrap cause
|
||||
}
|
||||
}
|
||||
}
|
||||
return sortBySimilarity(results, singleton(query), new MetricAvg(getSeriesMatchMetric(), getMovieMatchMetric()), false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized List<SubtitleDescriptor> getSubtitleList(SearchResult searchResult, String languageName) throws Exception {
|
||||
Movie id = getIMDbID(searchResult);
|
||||
if (id != null) {
|
||||
return super.getSubtitleList(getIMDbID(searchResult), languageName);
|
||||
}
|
||||
return emptyList();
|
||||
}
|
||||
|
||||
@Override
|
||||
public URI getSubtitleListLink(SearchResult searchResult, String languageName) {
|
||||
try {
|
||||
Movie id = getIMDbID(searchResult);
|
||||
if (id != null) {
|
||||
return super.getSubtitleListLink(id, languageName);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
Logger.getLogger(WebServices.class.getName()).log(Level.WARNING, e.getMessage());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public Movie getIMDbID(SearchResult result) throws Exception {
|
||||
if (result instanceof TheTVDBSearchResult) {
|
||||
TheTVDBSearchResult searchResult = (TheTVDBSearchResult) result;
|
||||
TheTVDBSeriesInfo seriesInfo = (TheTVDBSeriesInfo) ((TheTVDBClient) seriesIndex).getSeriesInfo(searchResult, Locale.ENGLISH);
|
||||
if (seriesInfo.getImdbId() != null) {
|
||||
int imdbId = grepImdbId(seriesInfo.getImdbId()).iterator().next();
|
||||
return new Movie(seriesInfo.getName(), seriesInfo.getStartDate().getYear(), imdbId, -1);
|
||||
}
|
||||
}
|
||||
if (result instanceof Movie) {
|
||||
Movie m = (Movie) result;
|
||||
if (m.getImdbId() > 0)
|
||||
return m;
|
||||
|
||||
// fetch extended movie info
|
||||
m = movieIndex.getMovieDescriptor(m, Locale.ENGLISH);
|
||||
if (m.getImdbId() > 0)
|
||||
return m;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -36,6 +36,7 @@ import net.filebot.util.FileUtilities.RegexFileFilter;
|
|||
import net.filebot.web.AnidbSearchResult;
|
||||
import net.filebot.web.CachedResource;
|
||||
import net.filebot.web.Movie;
|
||||
import net.filebot.web.OpenSubtitlesSearchResult;
|
||||
import net.filebot.web.TheTVDBSearchResult;
|
||||
|
||||
import org.tukaani.xz.XZInputStream;
|
||||
|
@ -305,6 +306,10 @@ public class ReleaseInfo {
|
|||
return anidbIndexResource.get();
|
||||
}
|
||||
|
||||
public OpenSubtitlesSearchResult[] getOpenSubtitlesIndex() throws IOException {
|
||||
return osdbIndexResource.get();
|
||||
}
|
||||
|
||||
private Map<Pattern, String> seriesDirectMappings;
|
||||
|
||||
public Map<Pattern, String> getSeriesDirectMappings() throws IOException {
|
||||
|
@ -349,6 +354,7 @@ public class ReleaseInfo {
|
|||
protected final CachedResource<String[]> seriesDirectMappingsResource = new PatternResource(getProperty("url.series-mappings"));
|
||||
protected final CachedResource<TheTVDBSearchResult[]> tvdbIndexResource = new TheTVDBIndexResource(getProperty("url.thetvdb-index"));
|
||||
protected final CachedResource<AnidbSearchResult[]> anidbIndexResource = new AnidbIndexResource(getProperty("url.anidb-index"));
|
||||
protected final CachedResource<OpenSubtitlesSearchResult[]> osdbIndexResource = new OpenSubtitlesIndexResource(getProperty("url.osdb-index"));
|
||||
|
||||
protected String getProperty(String propertyName) {
|
||||
// allow override via Java System properties
|
||||
|
@ -416,7 +422,7 @@ public class ReleaseInfo {
|
|||
protected static class AnidbIndexResource extends CachedResource<AnidbSearchResult[]> {
|
||||
|
||||
public AnidbIndexResource(String resource) {
|
||||
super(resource, AnidbSearchResult[].class, ONE_WEEK); // check for updates every month
|
||||
super(resource, AnidbSearchResult[].class, ONE_WEEK); // check for updates every week
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -435,6 +441,30 @@ public class ReleaseInfo {
|
|||
}
|
||||
}
|
||||
|
||||
protected static class OpenSubtitlesIndexResource extends CachedResource<OpenSubtitlesSearchResult[]> {
|
||||
|
||||
public OpenSubtitlesIndexResource(String resource) {
|
||||
super(resource, OpenSubtitlesSearchResult[].class, ONE_MONTH); // check for updates every month
|
||||
}
|
||||
|
||||
@Override
|
||||
public OpenSubtitlesSearchResult[] process(ByteBuffer data) throws IOException {
|
||||
List<String[]> rows = readCSV(new XZInputStream(new ByteBufferInputStream(data)), "UTF-8", "\t");
|
||||
List<OpenSubtitlesSearchResult> result = new ArrayList<OpenSubtitlesSearchResult>(rows.size());
|
||||
|
||||
for (String[] row : rows) {
|
||||
int imdbid = parseInt(row[0]);
|
||||
String name = row[1];
|
||||
int year = parseInt(row[2]);
|
||||
char kind = row[3].charAt(0);
|
||||
int score = parseInt(row[4]);
|
||||
result.add(new OpenSubtitlesSearchResult(imdbid, name, year, kind, score));
|
||||
}
|
||||
|
||||
return result.toArray(new OpenSubtitlesSearchResult[0]);
|
||||
}
|
||||
}
|
||||
|
||||
protected static class FolderEntryFilter implements FileFilter {
|
||||
|
||||
private final Pattern entryPattern;
|
||||
|
|
|
@ -34,6 +34,9 @@ url.thetvdb-index: http://app.filebot.net/data/thetvdb.txt.xz
|
|||
# AniDB index
|
||||
url.anidb-index: http://app.filebot.net/data/anidb.txt.xz
|
||||
|
||||
# OpenSubtitles index
|
||||
url.osdb-index: http://app.filebot.net/data/osdb.txt.xz
|
||||
|
||||
# disk folder matcher
|
||||
pattern.diskfolder.entry: BDMV|HVDVD_TS|VIDEO_TS|AUDIO_TS|VCD|MovieObject.bdmv|VIDEO_TS.VOB
|
||||
|
||||
|
|
|
@ -19,8 +19,6 @@ import java.util.ArrayList;
|
|||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
|
@ -48,7 +46,6 @@ import net.filebot.ui.SelectDialog;
|
|||
import net.filebot.util.ui.LabelProvider;
|
||||
import net.filebot.util.ui.LinkButton;
|
||||
import net.filebot.util.ui.SimpleLabelProvider;
|
||||
import net.filebot.web.Movie;
|
||||
import net.filebot.web.OpenSubtitlesClient;
|
||||
import net.filebot.web.SearchResult;
|
||||
import net.filebot.web.SubtitleDescriptor;
|
||||
|
@ -152,12 +149,9 @@ public class SubtitlePanel extends AbstractSearchPanel<SubtitleProvider, Subtitl
|
|||
};
|
||||
|
||||
protected Collection<String> getHistory(SubtitleProvider engine) throws Exception {
|
||||
final Set<String> names = new TreeSet<String>();
|
||||
for (Movie it : MediaDetection.releaseInfo.getMovieList()) {
|
||||
names.add(it.getName());
|
||||
}
|
||||
for (SearchResult it : MediaDetection.releaseInfo.getTheTVDBIndex()) {
|
||||
names.add(it.getName());
|
||||
List<String> names = new ArrayList<String>();
|
||||
for (SearchResult it : MediaDetection.releaseInfo.getOpenSubtitlesIndex()) {
|
||||
names.addAll(it.getEffectiveNames());
|
||||
}
|
||||
return names;
|
||||
};
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
package net.filebot.web;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
public class OpenSubtitlesSearchResult extends Movie {
|
||||
|
||||
public static final char KIND_MOVIE = 'm';
|
||||
public static final char KIND_SERIES = 's';
|
||||
|
||||
private char kind;
|
||||
private int score;
|
||||
|
||||
public OpenSubtitlesSearchResult(int imdbId, String name, int year, char kind, int score) {
|
||||
super(name, null, year, imdbId, -1, Locale.ENGLISH);
|
||||
|
||||
this.kind = kind;
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
public char getKind() {
|
||||
return kind;
|
||||
}
|
||||
|
||||
public int getScore() {
|
||||
return score;
|
||||
}
|
||||
|
||||
}
|
|
@ -17,7 +17,7 @@ public abstract class SearchResult implements Serializable {
|
|||
|
||||
public SearchResult(String name, String[] aliasNames) {
|
||||
this.name = name;
|
||||
this.aliasNames = aliasNames.clone();
|
||||
this.aliasNames = (aliasNames == null || aliasNames.length == 0) ? EMPTY_STRING_ARRAY : aliasNames.clone();
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
|
@ -57,4 +57,6 @@ public abstract class SearchResult implements Serializable {
|
|||
return name;
|
||||
}
|
||||
|
||||
private static final String[] EMPTY_STRING_ARRAY = new String[0];
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue