* fixed some corner case issues with the imdb scraper
This commit is contained in:
parent
09ec7c9dfb
commit
0861220aed
|
@ -2,6 +2,7 @@
|
||||||
package net.sourceforge.filebot.web;
|
package net.sourceforge.filebot.web;
|
||||||
|
|
||||||
|
|
||||||
|
import static java.util.Arrays.*;
|
||||||
import static java.util.Collections.*;
|
import static java.util.Collections.*;
|
||||||
import static net.sourceforge.filebot.web.WebRequest.*;
|
import static net.sourceforge.filebot.web.WebRequest.*;
|
||||||
import static net.sourceforge.tuned.XPathUtilities.*;
|
import static net.sourceforge.tuned.XPathUtilities.*;
|
||||||
|
@ -92,10 +93,15 @@ public class IMDbClient implements MovieIdentificationService {
|
||||||
|
|
||||||
// we might have been redirected to the movie page
|
// we might have been redirected to the movie page
|
||||||
if (results.isEmpty()) {
|
if (results.isEmpty()) {
|
||||||
Movie movie = scrapeMovie(dom, locale);
|
try {
|
||||||
if (movie != null) {
|
int imdbid = getImdbId(selectString("//LINK[@rel='canonical']/@href", dom));
|
||||||
|
Movie movie = getMovieDescriptor(imdbid, locale);
|
||||||
|
if (movie == null) {
|
||||||
results.add(movie);
|
results.add(movie);
|
||||||
}
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
// ignore, can't find movie
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
|
@ -115,7 +121,7 @@ public class IMDbClient implements MovieIdentificationService {
|
||||||
// try to get localized name
|
// try to get localized name
|
||||||
if (locale != null && locale != Locale.ROOT) {
|
if (locale != null && locale != Locale.ROOT) {
|
||||||
try {
|
try {
|
||||||
String languageName = locale.getDisplayLanguage(Locale.ENGLISH).toLowerCase();
|
String language = String.format("(%s title)", locale.getDisplayLanguage(Locale.ENGLISH).toLowerCase());
|
||||||
List<Node> akaRows = selectNodes("//*[@name='akas']//following::TABLE[1]//TR", dom);
|
List<Node> akaRows = selectNodes("//*[@name='akas']//following::TABLE[1]//TR", dom);
|
||||||
|
|
||||||
for (Node aka : akaRows) {
|
for (Node aka : akaRows) {
|
||||||
|
@ -123,7 +129,7 @@ public class IMDbClient implements MovieIdentificationService {
|
||||||
String akaTitle = getTextContent(columns.get(0));
|
String akaTitle = getTextContent(columns.get(0));
|
||||||
String languageDesc = getTextContent(columns.get(1)).toLowerCase();
|
String languageDesc = getTextContent(columns.get(1)).toLowerCase();
|
||||||
|
|
||||||
if (languageName.length() > 0 && languageDesc.contains(languageName)) {
|
if (language.length() > 0 && languageDesc.contains(language) && frequency(asList(languageDesc.split("\\W")), "title") == 1) {
|
||||||
name = akaTitle;
|
name = akaTitle;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -207,7 +213,6 @@ public class IMDbClient implements MovieIdentificationService {
|
||||||
Map<MovieProperty, String> fields = new EnumMap<MovieProperty, String>(MovieProperty.class);
|
Map<MovieProperty, String> fields = new EnumMap<MovieProperty, String>(MovieProperty.class);
|
||||||
fields.put(MovieProperty.name, data.get("title"));
|
fields.put(MovieProperty.name, data.get("title"));
|
||||||
fields.put(MovieProperty.certification, data.get("rated"));
|
fields.put(MovieProperty.certification, data.get("rated"));
|
||||||
fields.put(MovieProperty.released, Date.parse(data.get("released"), "dd MMM yyyy").toString());
|
|
||||||
fields.put(MovieProperty.tagline, data.get("plot"));
|
fields.put(MovieProperty.tagline, data.get("plot"));
|
||||||
fields.put(MovieProperty.rating, data.get("imdbRating"));
|
fields.put(MovieProperty.rating, data.get("imdbRating"));
|
||||||
fields.put(MovieProperty.votes, data.get("imdbVotes").replaceAll("\\D", ""));
|
fields.put(MovieProperty.votes, data.get("imdbVotes").replaceAll("\\D", ""));
|
||||||
|
|
|
@ -59,4 +59,14 @@ public class IMDbClientTest {
|
||||||
assertEquals(106559, movie.getImdbId(), 0);
|
assertEquals(106559, movie.getImdbId(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void getAkaMovieDescriptorExtra() throws Exception {
|
||||||
|
Movie movie = imdb.getMovieDescriptor(470761, Locale.ENGLISH);
|
||||||
|
|
||||||
|
assertEquals("First Born", movie.getName());
|
||||||
|
assertEquals(2007, movie.getYear());
|
||||||
|
assertEquals(470761, movie.getImdbId(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue