* fix TV Movie Lookup

This commit is contained in:
Reinhard Pointner 2013-01-27 09:56:40 +00:00
parent 2e68365b6e
commit cc57b89840
2 changed files with 16 additions and 9 deletions

View File

@ -41,6 +41,9 @@ import org.xml.sax.SAXException;
public class IMDbClient implements MovieIdentificationService { public class IMDbClient implements MovieIdentificationService {
private String host = "www.imdb.com";
@Override @Override
public String getName() { public String getName() {
return "IMDb"; return "IMDb";
@ -53,12 +56,6 @@ public class IMDbClient implements MovieIdentificationService {
} }
protected String getHost() {
String host = System.getProperty("imdb.hostname"); // default to akas.imdb.com but allow override via -Dimdb.host
return host == null ? "imdb.com" : host;
}
protected int getImdbId(String link) { protected int getImdbId(String link) {
Matcher matcher = Pattern.compile("tt(\\d{7})").matcher(link); Matcher matcher = Pattern.compile("tt(\\d{7})").matcher(link);
@ -73,7 +70,7 @@ public class IMDbClient implements MovieIdentificationService {
@Override @Override
public List<Movie> searchMovie(String query, Locale locale) throws Exception { public List<Movie> searchMovie(String query, Locale locale) throws Exception {
Document dom = parsePage(new URL("http", getHost(), "/find?s=tt&q=" + encode(query, false))); Document dom = parsePage(new URL("http", host, "/find?s=tt&q=" + encode(query, false)));
// select movie links followed by year in parenthesis // select movie links followed by year in parenthesis
List<Node> nodes = selectNodes("//TABLE[@class='findList']//TD/A[substring-after(substring-before(following::text(),')'),'(')]", dom); List<Node> nodes = selectNodes("//TABLE[@class='findList']//TD/A[substring-after(substring-before(following::text(),')'),'(')]", dom);
@ -118,7 +115,7 @@ public class IMDbClient implements MovieIdentificationService {
return null; return null;
String name = selectString("//H1/text()", dom).replaceAll("\\s+", " ").trim(); String name = selectString("//H1/text()", dom).replaceAll("\\s+", " ").trim();
String year = new Scanner(selectString("//H1//A/text()", dom)).useDelimiter("\\D+").next(); String year = new Scanner(selectNode("//H1/SPAN", dom).getTextContent()).useDelimiter("\\D+").next();
int imdbid = getImdbId(selectString("//LINK[@rel='canonical']/@href", dom)); int imdbid = getImdbId(selectString("//LINK[@rel='canonical']/@href", dom));
return new Movie(name, Pattern.matches("\\d{4}", year) ? Integer.parseInt(year) : -1, imdbid, -1); return new Movie(name, Pattern.matches("\\d{4}", year) ? Integer.parseInt(year) : -1, imdbid, -1);
@ -132,7 +129,7 @@ public class IMDbClient implements MovieIdentificationService {
@Override @Override
public Movie getMovieDescriptor(int imdbid, Locale locale) throws Exception { public Movie getMovieDescriptor(int imdbid, Locale locale) throws Exception {
try { try {
return scrapeMovie(parsePage(new URL("http", getHost(), String.format("/title/tt%07d", imdbid))), locale); return scrapeMovie(parsePage(new URL("http", host, String.format("/title/tt%07d/", imdbid))), locale);
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
return null; // illegal imdbid return null; // illegal imdbid
} }

View File

@ -113,6 +113,16 @@ public class IMDbClientTest {
} }
@Test
public void getMovieDescriptor3() throws Exception {
Movie movie = imdb.getMovieDescriptor(75610, null);
assertEquals("21", movie.getName());
assertEquals(1977, movie.getYear());
assertEquals(75610, movie.getImdbId(), 0);
}
@Test @Test
public void getAkaMovieDescriptor() throws Exception { public void getAkaMovieDescriptor() throws Exception {
Movie movie = imdb.getMovieDescriptor(106559, Locale.ENGLISH); Movie movie = imdb.getMovieDescriptor(106559, Locale.ENGLISH);