From f5b4dbee1974f042d252c01aee7d762dbebc09ad Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Thu, 23 Jan 2014 18:18:25 +0000 Subject: [PATCH] * support and include TheMovieDB alternative_titles data in search and index --- BuildData.groovy | 14 ++-- source/ehcache.xml | 4 +- .../filebot/media/MediaDetection.java | 9 --- .../ui/subtitle/SubtitleUploadDialog.java | 3 +- .../sourceforge/filebot/web/IMDbClient.java | 2 +- .../filebot/web/OpenSubtitlesClient.java | 4 + .../sourceforge/filebot/web/TMDbClient.java | 74 +++++++++++++++---- website/data/query-blacklist.txt | 3 + 8 files changed, 81 insertions(+), 32 deletions(-) diff --git a/BuildData.groovy b/BuildData.groovy index 3c041187..94a51996 100644 --- a/BuildData.groovy +++ b/BuildData.groovy @@ -117,16 +117,20 @@ def tmdb_txt = new File('tmdb.txt') def tmdb_index = csv(tmdb_txt, '\t', 1, [0..-1]) def tmdb = omdb.findResults{ m -> - if (tmdb_index.containsKey(m[0])) { + def sync = System.currentTimeMillis() + if (tmdb_index.containsKey(m[0]) && (sync - tmdb_index[m[0]][0].toLong()) < (360 * 24 * 60 * 60 * 1000L) ) { return tmdb_index[m[0]] } - def sync = System.currentTimeMillis() def row = [sync, m[0].pad(7), 0, m[2], m[1]] try { - def info = net.sourceforge.filebot.WebServices.TMDb.getMovieInfo("tt${m[0]}", Locale.ENGLISH, false) - def names = [info.name, info.originalName, m[1]] - row = [sync, m[0].pad(7), info.id.pad(7), info.released?.year ?: m[2]] + names.findResults{ it ?: '' } + def info = net.sourceforge.filebot.WebServices.TMDb.getMovieInfo("tt${m[0]}", Locale.ENGLISH, true, false) + def names = [info.name, info.originalName] + info.alternativeTitles + if (info.released != null) { + row = [sync, m[0].pad(7), info.id.pad(7), info.released.year] + names + } else { + println "Illegal movie: ${info.name} | ${m}" + } } catch(FileNotFoundException e) { } diff --git a/source/ehcache.xml b/source/ehcache.xml index 0d1e2b29..5598df42 100644 --- a/source/ehcache.xml +++ b/source/ehcache.xml @@ -51,7 +51,7 @@ --> getProbableMatches(String query, Collection options) { // auto-select most probable search result List probableMatches = new LinkedList(); diff --git a/source/net/sourceforge/filebot/ui/subtitle/SubtitleUploadDialog.java b/source/net/sourceforge/filebot/ui/subtitle/SubtitleUploadDialog.java index 9693889e..36da3964 100644 --- a/source/net/sourceforge/filebot/ui/subtitle/SubtitleUploadDialog.java +++ b/source/net/sourceforge/filebot/ui/subtitle/SubtitleUploadDialog.java @@ -52,6 +52,7 @@ import net.miginfocom.swing.MigLayout; import net.sourceforge.filebot.Analytics; import net.sourceforge.filebot.Language; import net.sourceforge.filebot.ResourceManager; +import net.sourceforge.filebot.WebServices; import net.sourceforge.filebot.media.MediaDetection; import net.sourceforge.filebot.ui.LanguageComboBox; import net.sourceforge.filebot.ui.SelectDialog; @@ -668,7 +669,7 @@ public class SubtitleUploadDialog extends JDialog { Collection identity = MediaDetection.detectMovie(mapping.getVideo(), database, database, Locale.ENGLISH, true); for (Movie it : identity) { if (it.getImdbId() <= 0 && it.getTmdbId() > 0) { - it = MediaDetection.tmdb2imdb(it); + it = WebServices.TMDb.getMovieDescriptor(it.getTmdbId(), Locale.ENGLISH, false); } if (it != null && it.getImdbId() > 0) { mapping.setIdentity(it); diff --git a/source/net/sourceforge/filebot/web/IMDbClient.java b/source/net/sourceforge/filebot/web/IMDbClient.java index 76b44bbc..5d308ada 100644 --- a/source/net/sourceforge/filebot/web/IMDbClient.java +++ b/source/net/sourceforge/filebot/web/IMDbClient.java @@ -234,6 +234,6 @@ public class IMDbClient implements MovieIdentificationService { actors.add(new Person(writer, null, "Writer")); } - return new MovieInfo(fields, genres, new ArrayList(0), actors, new ArrayList(0)); + return new MovieInfo(fields, new ArrayList(0), genres, new ArrayList(0), actors, new ArrayList(0)); } } diff --git a/source/net/sourceforge/filebot/web/OpenSubtitlesClient.java b/source/net/sourceforge/filebot/web/OpenSubtitlesClient.java index 6b23d0a7..5557e0d1 100644 --- a/source/net/sourceforge/filebot/web/OpenSubtitlesClient.java +++ b/source/net/sourceforge/filebot/web/OpenSubtitlesClient.java @@ -395,6 +395,10 @@ public class OpenSubtitlesClient implements SubtitleProvider, VideoHashSubtitleS } public Locale detectLanguage(byte[] data) throws Exception { + if (data.length < 256) { + throw new IllegalArgumentException("data is not enough"); + } + // require login login(); diff --git a/source/net/sourceforge/filebot/web/TMDbClient.java b/source/net/sourceforge/filebot/web/TMDbClient.java index fcface6d..be8c01f0 100644 --- a/source/net/sourceforge/filebot/web/TMDbClient.java +++ b/source/net/sourceforge/filebot/web/TMDbClient.java @@ -17,10 +17,12 @@ import java.util.ArrayList; import java.util.Collection; import java.util.EnumMap; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Scanner; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.logging.Level; import java.util.logging.Logger; @@ -65,8 +67,9 @@ public class TMDbClient implements MovieIdentificationService { List result = new ArrayList(); for (JSONObject it : jsonList(response.get("results"))) { - if (it == null) + if (it == null) { continue; + } // e.g. // {"id":16320,"title":"冲出宁静号","release_date":"2005-09-30","original_title":"Serenity"} @@ -85,7 +88,28 @@ public class TMDbClient implements MovieIdentificationService { } catch (Exception e) { throw new IllegalArgumentException("Missing data: release date"); } - result.add(new Movie(title, title.equals(originalTitle) ? new String[] {} : new String[] { originalTitle }, year, -1, id)); + + Set alternativeTitles = new LinkedHashSet(); + if (originalTitle != null) { + alternativeTitles.add(originalTitle); + } + + try { + String countryCode = locale.getCountry().isEmpty() ? "US" : locale.getCountry(); + JSONObject titles = request("movie/" + id + "/alternative_titles", null, null, REQUEST_LIMIT); + for (JSONObject node : jsonList(titles.get("titles"))) { + if (countryCode.equals(node.get("iso_3166_1"))) { + alternativeTitles.add((String) node.get("title")); + } + } + } catch (Exception e) { + Logger.getLogger(TMDbClient.class.getName()).log(Level.WARNING, String.format("Unable to retrieve alternative titles [%s]: %s", title, e.getMessage())); + } + + // make sure main title is not in the set of alternative titles + alternativeTitles.remove(title); + + result.add(new Movie(title, alternativeTitles.toArray(new String[0]), year, -1, id)); } catch (Exception e) { // only print 'missing release date' warnings for matching movie titles if (query.equalsIgnoreCase(title) || query.equalsIgnoreCase(originalTitle)) { @@ -102,9 +126,13 @@ public class TMDbClient implements MovieIdentificationService { @Override public Movie getMovieDescriptor(int imdbid, Locale locale) throws IOException { - String id = String.format("tt%07d", imdbid); + return getMovieDescriptor(imdbid, locale, true); + } + + public Movie getMovieDescriptor(int imdbtmdbid, Locale locale, boolean byIMDB) throws IOException { + String id = byIMDB ? String.format("tt%07d", imdbtmdbid) : String.valueOf(imdbtmdbid); try { - MovieInfo info = getMovieInfo(id, locale, false); + MovieInfo info = getMovieInfo(id, locale, false, false); return new Movie(info.getName(), info.getReleased().getYear(), info.getImdbId(), info.getId()); } catch (FileNotFoundException e) { Logger.getLogger(getClass().getName()).log(Level.WARNING, "Movie not found: " + id); @@ -122,20 +150,20 @@ public class TMDbClient implements MovieIdentificationService { public MovieInfo getMovieInfo(Movie movie, Locale locale) throws IOException { if (movie.getTmdbId() >= 0) { - return getMovieInfo(String.valueOf(movie.getTmdbId()), locale, true); + return getMovieInfo(String.valueOf(movie.getTmdbId()), locale, true, true); } else if (movie.getImdbId() >= 0) { - return getMovieInfo(String.format("tt%07d", movie.getImdbId()), locale, true); + return getMovieInfo(String.format("tt%07d", movie.getImdbId()), locale, true, true); } else { for (Movie result : searchMovie(movie.getName(), locale)) { if (movie.getName().equalsIgnoreCase(result.getName()) && movie.getYear() == result.getYear()) { - return getMovieInfo(String.valueOf(result.getTmdbId()), locale, true); + return getMovieInfo(String.valueOf(result.getTmdbId()), locale, true, true); } } } return null; } - public MovieInfo getMovieInfo(String id, Locale locale, boolean extendedInfo) throws IOException { + public MovieInfo getMovieInfo(String id, Locale locale, boolean includeAlternativeTitles, boolean includeExtendedInfo) throws IOException { JSONObject response = request("movie/" + id, null, locale, REQUEST_LIMIT); Map fields = new EnumMap(MovieProperty.class); @@ -163,17 +191,29 @@ public class TMDbClient implements MovieIdentificationService { spokenLanguages.add((String) it.get("iso_639_1")); } - if (extendedInfo) { + List alternativeTitles = new ArrayList(); + if (includeAlternativeTitles) { + String countryCode = locale.getCountry().isEmpty() ? "US" : locale.getCountry(); + JSONObject titles = request("movie/" + fields.get(MovieProperty.id) + "/alternative_titles", null, null, REQUEST_LIMIT); + for (JSONObject it : jsonList(titles.get("titles"))) { + if (countryCode.equals(it.get("iso_3166_1"))) { + alternativeTitles.add((String) it.get("title")); + } + } + } + + if (includeExtendedInfo) { + String countryCode = locale.getCountry().isEmpty() ? "US" : locale.getCountry(); JSONObject releases = request("movie/" + fields.get(MovieProperty.id) + "/releases", null, null, REQUEST_LIMIT); for (JSONObject it : jsonList(releases.get("countries"))) { - if ("US".equals(it.get("iso_3166_1"))) { + if (countryCode.equals(it.get("iso_3166_1"))) { fields.put(MovieProperty.certification, (String) it.get("certification")); } } } List cast = new ArrayList(); - if (extendedInfo) { + if (includeExtendedInfo) { JSONObject castResponse = request("movie/" + fields.get(MovieProperty.id) + "/casts", null, null, REQUEST_LIMIT); for (String section : new String[] { "cast", "crew" }) { for (JSONObject it : jsonList(castResponse.get(section))) { @@ -190,7 +230,7 @@ public class TMDbClient implements MovieIdentificationService { } List trailers = new ArrayList(); - if (extendedInfo) { + if (includeExtendedInfo) { JSONObject trailerResponse = request("movie/" + fields.get(MovieProperty.id) + "/trailers", null, null, REQUEST_LIMIT); for (String section : new String[] { "quicktime", "youtube" }) { for (JSONObject it : jsonList(trailerResponse.get(section))) { @@ -207,7 +247,7 @@ public class TMDbClient implements MovieIdentificationService { } } - return new MovieInfo(fields, genres, spokenLanguages, cast, trailers); + return new MovieInfo(fields, alternativeTitles, genres, spokenLanguages, cast, trailers); } public List getArtwork(String id) throws IOException { @@ -300,6 +340,7 @@ public class TMDbClient implements MovieIdentificationService { protected Map fields; + protected String[] alternativeTitles; protected String[] genres; protected String[] spokenLanguages; @@ -310,8 +351,9 @@ public class TMDbClient implements MovieIdentificationService { // used by serializer } - protected MovieInfo(Map fields, List genres, List spokenLanguages, List people, List trailers) { + protected MovieInfo(Map fields, List alternativeTitles, List genres, List spokenLanguages, List people, List trailers) { this.fields = new EnumMap(fields); + this.alternativeTitles = alternativeTitles.toArray(new String[0]); this.genres = genres.toArray(new String[0]); this.spokenLanguages = spokenLanguages.toArray(new String[0]); this.people = people.toArray(new Person[0]); @@ -476,6 +518,10 @@ public class TMDbClient implements MovieIdentificationService { return unmodifiableList(asList(trailers)); } + public List getAlternativeTitles() { + return unmodifiableList(asList(alternativeTitles)); + } + @Override public String toString() { return fields.toString(); diff --git a/website/data/query-blacklist.txt b/website/data/query-blacklist.txt index 36f52031..bd597174 100644 --- a/website/data/query-blacklist.txt +++ b/website/data/query-blacklist.txt @@ -26,6 +26,7 @@ ^AUDIO_TS$ ^bad$ ^BDMV$ +^Best$ ^Big$ ^clean$ ^cleaned$ @@ -111,6 +112,7 @@ ^temporary$ ^test$ ^testdata$ +^The.Best$ ^tmp$ ^to$ ^Torrent$ @@ -233,6 +235,7 @@ LMAO lol Los.Sustitutos M.HD +MegaPack mkvonly Movie.Pack mpg$