* fix scraper issues with (I) (II) etc tags
This commit is contained in:
parent
cc57b89840
commit
3f2499fbea
|
@ -115,7 +115,7 @@ public class IMDbClient implements MovieIdentificationService {
|
|||
return null;
|
||||
|
||||
String name = selectString("//H1/text()", dom).replaceAll("\\s+", " ").trim();
|
||||
String year = new Scanner(selectNode("//H1/SPAN", dom).getTextContent()).useDelimiter("\\D+").next();
|
||||
String year = new Scanner(selectNode("//H1/SPAN[@class='nobr']", dom).getTextContent()).useDelimiter("\\D+").next();
|
||||
int imdbid = getImdbId(selectString("//LINK[@rel='canonical']/@href", dom));
|
||||
|
||||
return new Movie(name, Pattern.matches("\\d{4}", year) ? Integer.parseInt(year) : -1, imdbid, -1);
|
||||
|
|
|
@ -123,6 +123,16 @@ public class IMDbClientTest {
|
|||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void getMovieDescriptor4() throws Exception {
|
||||
Movie movie = imdb.getMovieDescriptor(369702, null);
|
||||
|
||||
assertEquals("The Sea Inside", movie.getName());
|
||||
assertEquals(2004, movie.getYear());
|
||||
assertEquals(369702, movie.getImdbId(), 0);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void getAkaMovieDescriptor() throws Exception {
|
||||
Movie movie = imdb.getMovieDescriptor(106559, Locale.ENGLISH);
|
||||
|
|
Loading…
Reference in New Issue