+ support IMDb as movie db
This commit is contained in:
parent
5228c76dbc
commit
81533d0a66
|
@ -39,27 +39,27 @@ public final class WebServices {
|
||||||
// movie dbs
|
// movie dbs
|
||||||
public static final TMDbClient TMDb = new TMDbClient(getApplicationProperty("themoviedb.apikey"));
|
public static final TMDbClient TMDb = new TMDbClient(getApplicationProperty("themoviedb.apikey"));
|
||||||
|
|
||||||
|
|
||||||
public static EpisodeListProvider[] getEpisodeListProviders() {
|
public static EpisodeListProvider[] getEpisodeListProviders() {
|
||||||
return new EpisodeListProvider[] { TVRage, AniDB, IMDb, TheTVDB, Serienjunkies };
|
return new EpisodeListProvider[] { TVRage, AniDB, IMDb, TheTVDB, Serienjunkies };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static MovieIdentificationService[] getMovieIdentificationServices() {
|
public static MovieIdentificationService[] getMovieIdentificationServices() {
|
||||||
return new MovieIdentificationService[] { OpenSubtitles, TMDb };
|
return new MovieIdentificationService[] { OpenSubtitles, IMDb, TMDb };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static SubtitleProvider[] getSubtitleProviders() {
|
public static SubtitleProvider[] getSubtitleProviders() {
|
||||||
return new SubtitleProvider[] { OpenSubtitles, Sublight, Subscene };
|
return new SubtitleProvider[] { OpenSubtitles, Sublight, Subscene };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static VideoHashSubtitleService[] getVideoHashSubtitleServices() {
|
public static VideoHashSubtitleService[] getVideoHashSubtitleServices() {
|
||||||
return new VideoHashSubtitleService[] { OpenSubtitles, Sublight };
|
return new VideoHashSubtitleService[] { OpenSubtitles, Sublight };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static EpisodeListProvider getEpisodeListProvider(String name) {
|
public static EpisodeListProvider getEpisodeListProvider(String name) {
|
||||||
for (EpisodeListProvider it : WebServices.getEpisodeListProviders()) {
|
for (EpisodeListProvider it : WebServices.getEpisodeListProviders()) {
|
||||||
if (it.getName().equalsIgnoreCase(name))
|
if (it.getName().equalsIgnoreCase(name))
|
||||||
|
@ -69,7 +69,7 @@ public final class WebServices {
|
||||||
return null; // default
|
return null; // default
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static MovieIdentificationService getMovieIdentificationService(String name) {
|
public static MovieIdentificationService getMovieIdentificationService(String name) {
|
||||||
for (MovieIdentificationService it : getMovieIdentificationServices()) {
|
for (MovieIdentificationService it : getMovieIdentificationServices()) {
|
||||||
if (it.getName().equalsIgnoreCase(name))
|
if (it.getName().equalsIgnoreCase(name))
|
||||||
|
@ -79,7 +79,7 @@ public final class WebServices {
|
||||||
return null; // default
|
return null; // default
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Dummy constructor to prevent instantiation.
|
* Dummy constructor to prevent instantiation.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -5,6 +5,7 @@ package net.sourceforge.filebot.web;
|
||||||
import static net.sourceforge.filebot.web.WebRequest.*;
|
import static net.sourceforge.filebot.web.WebRequest.*;
|
||||||
import static net.sourceforge.tuned.XPathUtilities.*;
|
import static net.sourceforge.tuned.XPathUtilities.*;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
|
@ -27,36 +28,34 @@ import net.sf.ehcache.CacheManager;
|
||||||
import net.sourceforge.filebot.ResourceManager;
|
import net.sourceforge.filebot.ResourceManager;
|
||||||
|
|
||||||
|
|
||||||
public class IMDbClient extends AbstractEpisodeListProvider {
|
public class IMDbClient extends AbstractEpisodeListProvider implements MovieIdentificationService {
|
||||||
|
|
||||||
private final String host = "www.imdb.com";
|
private final String host = "www.imdb.com";
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getName() {
|
public String getName() {
|
||||||
return "IMDb";
|
return "IMDb";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Icon getIcon() {
|
public Icon getIcon() {
|
||||||
return ResourceManager.getIcon("search.imdb");
|
return ResourceManager.getIcon("search.imdb");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ResultCache getCache() {
|
public ResultCache getCache() {
|
||||||
return new ResultCache(host, CacheManager.getInstance().getCache("web-datasource"));
|
return new ResultCache(host, CacheManager.getInstance().getCache("web-datasource"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<SearchResult> fetchSearchResult(String query, Locale locale) throws IOException, SAXException {
|
public List<SearchResult> fetchSearchResult(String query, Locale locale) throws IOException, SAXException {
|
||||||
URL searchUrl = new URL("http", host, "/find?s=tt&q=" + encode(query));
|
Document dom = parsePage(new URL("http", host, "/find?s=tt&q=" + encode(query)));
|
||||||
Document dom = getHtmlDocument(openConnection(searchUrl));
|
|
||||||
|
|
||||||
List<Node> nodes = selectNodes("//TABLE//A[following-sibling::SMALL[contains(.,'series')]]", dom);
|
List<Node> nodes = selectNodes("//TABLE//A[following-sibling::SMALL[contains(.,'series')]]", dom);
|
||||||
|
|
||||||
List<SearchResult> results = new ArrayList<SearchResult>(nodes.size());
|
List<SearchResult> results = new ArrayList<SearchResult>(nodes.size());
|
||||||
|
|
||||||
for (Node node : nodes) {
|
for (Node node : nodes) {
|
||||||
|
@ -69,25 +68,20 @@ public class IMDbClient extends AbstractEpisodeListProvider {
|
||||||
|
|
||||||
// we might have been redirected to the movie page
|
// we might have been redirected to the movie page
|
||||||
if (results.isEmpty()) {
|
if (results.isEmpty()) {
|
||||||
try {
|
Movie movie = scrapeMovie(dom);
|
||||||
String name = normalizeName(selectString("//H1/text()", dom));
|
if (movie != null) {
|
||||||
String year = new Scanner(selectString("//H1//SPAN", dom)).useDelimiter("\\D+").next();
|
results.add(movie);
|
||||||
String url = selectString("//LINK[@rel='canonical']/@href", dom);
|
|
||||||
|
|
||||||
results.add(new Movie(name, Integer.parseInt(year), getImdbId(url)));
|
|
||||||
} catch (Exception e) {
|
|
||||||
// ignore, we probably got redirected to an error page
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<Episode> fetchEpisodeList(SearchResult searchResult, Locale locale) throws IOException, SAXException {
|
public List<Episode> fetchEpisodeList(SearchResult searchResult, Locale locale) throws IOException, SAXException {
|
||||||
Movie movie = (Movie) searchResult;
|
Movie movie = (Movie) searchResult;
|
||||||
Document dom = getHtmlDocument(openConnection(getEpisodeListLink(searchResult).toURL()));
|
Document dom = parsePage(getEpisodeListLink(searchResult).toURL());
|
||||||
|
|
||||||
String seriesName = normalizeName(selectString("//H1/A", dom));
|
String seriesName = normalizeName(selectString("//H1/A", dom));
|
||||||
Date year = new Date(movie.getYear(), 0, 0);
|
Date year = new Date(movie.getYear(), 0, 0);
|
||||||
|
@ -111,23 +105,13 @@ public class IMDbClient extends AbstractEpisodeListProvider {
|
||||||
return episodes;
|
return episodes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected URLConnection openConnection(URL url) throws IOException {
|
|
||||||
URLConnection connection = url.openConnection();
|
|
||||||
|
|
||||||
// IMDb refuses default user agent (Java/1.6.0_12)
|
|
||||||
connection.addRequestProperty("User-Agent", "Scraper");
|
|
||||||
|
|
||||||
return connection;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
protected String normalizeName(String name) {
|
protected String normalizeName(String name) {
|
||||||
// remove quotation marks
|
// remove quotation marks
|
||||||
return name.replaceAll("\"", "");
|
return name.replaceAll("\"", "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected int getImdbId(String link) {
|
protected int getImdbId(String link) {
|
||||||
Matcher matcher = Pattern.compile("tt(\\d{7})").matcher(link);
|
Matcher matcher = Pattern.compile("tt(\\d{7})").matcher(link);
|
||||||
|
|
||||||
|
@ -139,13 +123,13 @@ public class IMDbClient extends AbstractEpisodeListProvider {
|
||||||
throw new IllegalArgumentException(String.format("Cannot find imdb id: %s", link));
|
throw new IllegalArgumentException(String.format("Cannot find imdb id: %s", link));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public URI getEpisodeListLink(SearchResult searchResult) {
|
public URI getEpisodeListLink(SearchResult searchResult) {
|
||||||
return getEpisodeListLink(searchResult, 0);
|
return getEpisodeListLink(searchResult, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public URI getEpisodeListLink(SearchResult searchResult, int season) {
|
public URI getEpisodeListLink(SearchResult searchResult, int season) {
|
||||||
try {
|
try {
|
||||||
|
@ -154,4 +138,72 @@ public class IMDbClient extends AbstractEpisodeListProvider {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Movie> searchMovie(String query, Locale locale) throws Exception {
|
||||||
|
Document dom = parsePage(new URL("http", host, "/find?s=tt&q=" + encode(query)));
|
||||||
|
|
||||||
|
// select movie links followed by year in parenthesis
|
||||||
|
List<Node> nodes = selectNodes("//TABLE//A[string-length(substring-after(substring-before(following::text(),')'),'(')) = 4 and count(following-sibling::SMALL) = 0]", dom);
|
||||||
|
List<Movie> results = new ArrayList<Movie>(nodes.size());
|
||||||
|
|
||||||
|
for (Node node : nodes) {
|
||||||
|
String name = node.getTextContent().trim();
|
||||||
|
String year = node.getNextSibling().getTextContent().trim().replaceAll("[\\p{Punct}\\p{Space}]+", ""); // remove non-number characters
|
||||||
|
String href = getAttribute("href", node);
|
||||||
|
|
||||||
|
try {
|
||||||
|
results.add(new Movie(name, Integer.parseInt(year), getImdbId(href)));
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
// ignore illegal movies (TV Shows, Videos, Video Games, etc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// we might have been redirected to the movie page
|
||||||
|
if (results.isEmpty()) {
|
||||||
|
Movie movie = scrapeMovie(dom);
|
||||||
|
if (movie != null) {
|
||||||
|
results.add(movie);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected Movie scrapeMovie(Document dom) {
|
||||||
|
try {
|
||||||
|
String name = normalizeName(selectString("//H1/text()", dom));
|
||||||
|
String year = new Scanner(selectString("//H1//SPAN", dom)).useDelimiter("\\D+").next();
|
||||||
|
String url = selectString("//LINK[@rel='canonical']/@href", dom);
|
||||||
|
return new Movie(name, Integer.parseInt(year), getImdbId(url));
|
||||||
|
} catch (Exception e) {
|
||||||
|
// ignore, we probably got redirected to an error page
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Movie getMovieDescriptor(int imdbid, Locale locale) throws Exception {
|
||||||
|
return scrapeMovie(parsePage(new URL("http", host, String.format("/title/tt%07d/", imdbid))));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected Document parsePage(URL url) throws IOException, SAXException {
|
||||||
|
URLConnection connection = url.openConnection();
|
||||||
|
|
||||||
|
// IMDb refuses default user agent (Java/1.6.0_12)
|
||||||
|
connection.addRequestProperty("User-Agent", "Mozilla");
|
||||||
|
|
||||||
|
return getHtmlDocument(connection);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Movie[] getMovieDescriptors(File[] movieFiles, Locale locale) throws Exception {
|
||||||
|
return new Movie[movieFiles.length]; // UNSUPPORTED OPERATION => EMPTY RESULT
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -98,7 +98,11 @@ public final class WebRequest {
|
||||||
|
|
||||||
public static Document getDocument(InputSource source) throws IOException, SAXException {
|
public static Document getDocument(InputSource source) throws IOException, SAXException {
|
||||||
try {
|
try {
|
||||||
return DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(source);
|
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
|
||||||
|
factory.setValidating(false);
|
||||||
|
factory.setFeature("http://xml.org/sax/features/namespaces", false);
|
||||||
|
factory.setFeature("http://xml.org/sax/features/validation", false);
|
||||||
|
return factory.newDocumentBuilder().parse(source);
|
||||||
} catch (ParserConfigurationException e) {
|
} catch (ParserConfigurationException e) {
|
||||||
// will never happen
|
// will never happen
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
|
|
@ -13,7 +13,7 @@ public class IMDbClientTest {
|
||||||
|
|
||||||
private final IMDbClient imdb = new IMDbClient();
|
private final IMDbClient imdb = new IMDbClient();
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void search() throws Exception {
|
public void search() throws Exception {
|
||||||
List<SearchResult> results = imdb.search("battlestar");
|
List<SearchResult> results = imdb.search("battlestar");
|
||||||
|
@ -27,7 +27,7 @@ public class IMDbClientTest {
|
||||||
assertEquals(8, results.size(), 0);
|
assertEquals(8, results.size(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void searchMiniSeries() throws Exception {
|
public void searchMiniSeries() throws Exception {
|
||||||
List<SearchResult> results = imdb.search("generation kill");
|
List<SearchResult> results = imdb.search("generation kill");
|
||||||
|
@ -39,7 +39,7 @@ public class IMDbClientTest {
|
||||||
assertEquals(995832, movie.getImdbId(), 0);
|
assertEquals(995832, movie.getImdbId(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void searchNoMatch() throws Exception {
|
public void searchNoMatch() throws Exception {
|
||||||
List<SearchResult> results = imdb.search("i will not find anything for this query string");
|
List<SearchResult> results = imdb.search("i will not find anything for this query string");
|
||||||
|
@ -47,7 +47,7 @@ public class IMDbClientTest {
|
||||||
assertTrue(results.isEmpty());
|
assertTrue(results.isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void searchResultPageRedirect() throws Exception {
|
public void searchResultPageRedirect() throws Exception {
|
||||||
List<SearchResult> results = imdb.search("my name is earl");
|
List<SearchResult> results = imdb.search("my name is earl");
|
||||||
|
@ -61,7 +61,7 @@ public class IMDbClientTest {
|
||||||
assertEquals(460091, movie.getImdbId(), 0);
|
assertEquals(460091, movie.getImdbId(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void getEpisodeList() throws Exception {
|
public void getEpisodeList() throws Exception {
|
||||||
List<Episode> list = imdb.getEpisodeList(new Movie("Buffy", 1997, 118276));
|
List<Episode> list = imdb.getEpisodeList(new Movie("Buffy", 1997, 118276));
|
||||||
|
@ -87,7 +87,7 @@ public class IMDbClientTest {
|
||||||
assertEquals("2003-05-20", last.airdate().toString());
|
assertEquals("2003-05-20", last.airdate().toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void getEpisodeListWithUnknownSeason() throws Exception {
|
public void getEpisodeListWithUnknownSeason() throws Exception {
|
||||||
List<Episode> list = imdb.getEpisodeList(new Movie("Mushishi", 2005, 807832));
|
List<Episode> list = imdb.getEpisodeList(new Movie("Mushishi", 2005, 807832));
|
||||||
|
@ -103,7 +103,43 @@ public class IMDbClientTest {
|
||||||
assertEquals("1", first.getSeason().toString());
|
assertEquals("1", first.getSeason().toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void searchMovie() throws Exception {
|
||||||
|
List<Movie> results = imdb.searchMovie("Avatar", null);
|
||||||
|
|
||||||
|
assertEquals(26, results.size());
|
||||||
|
Movie movie = (Movie) results.get(0);
|
||||||
|
|
||||||
|
assertEquals("Avatar", movie.getName());
|
||||||
|
assertEquals(2009, movie.getYear());
|
||||||
|
assertEquals(499549, movie.getImdbId(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void searchMovieRedirect() throws Exception {
|
||||||
|
List<Movie> results = imdb.searchMovie("battle angel alita", null);
|
||||||
|
|
||||||
|
assertEquals(1, results.size());
|
||||||
|
Movie movie = (Movie) results.get(0);
|
||||||
|
|
||||||
|
assertEquals("Battle Angel", movie.getName());
|
||||||
|
assertEquals(1993, movie.getYear());
|
||||||
|
assertEquals(107061, movie.getImdbId(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void getMovieDescriptor() throws Exception {
|
||||||
|
Movie movie = imdb.getMovieDescriptor(499549, null);
|
||||||
|
|
||||||
|
assertEquals("Avatar", movie.getName());
|
||||||
|
assertEquals(2009, movie.getYear());
|
||||||
|
assertEquals(499549, movie.getImdbId(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void getEpisodeListLink() throws Exception {
|
public void getEpisodeListLink() throws Exception {
|
||||||
assertEquals("http://www.imdb.com/title/tt0407362/episodes", imdb.getEpisodeListLink(new Movie("Battlestar Galactica", 2004, 407362)).toString());
|
assertEquals("http://www.imdb.com/title/tt0407362/episodes", imdb.getEpisodeListLink(new Movie("Battlestar Galactica", 2004, 407362)).toString());
|
||||||
|
|
|
@ -12,6 +12,7 @@ PROPER
|
||||||
READNFO
|
READNFO
|
||||||
REPACK
|
REPACK
|
||||||
RETAIL
|
RETAIL
|
||||||
|
HDRip
|
||||||
sample[s]?$
|
sample[s]?$
|
||||||
ShareReactor
|
ShareReactor
|
||||||
ShareZONE
|
ShareZONE
|
||||||
|
|
Loading…
Reference in New Issue