* fix scraper issues with (I) (II) etc tags
This commit is contained in:
parent
cc57b89840
commit
3f2499fbea
|
@ -115,7 +115,7 @@ public class IMDbClient implements MovieIdentificationService {
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
String name = selectString("//H1/text()", dom).replaceAll("\\s+", " ").trim();
|
String name = selectString("//H1/text()", dom).replaceAll("\\s+", " ").trim();
|
||||||
String year = new Scanner(selectNode("//H1/SPAN", dom).getTextContent()).useDelimiter("\\D+").next();
|
String year = new Scanner(selectNode("//H1/SPAN[@class='nobr']", dom).getTextContent()).useDelimiter("\\D+").next();
|
||||||
int imdbid = getImdbId(selectString("//LINK[@rel='canonical']/@href", dom));
|
int imdbid = getImdbId(selectString("//LINK[@rel='canonical']/@href", dom));
|
||||||
|
|
||||||
return new Movie(name, Pattern.matches("\\d{4}", year) ? Integer.parseInt(year) : -1, imdbid, -1);
|
return new Movie(name, Pattern.matches("\\d{4}", year) ? Integer.parseInt(year) : -1, imdbid, -1);
|
||||||
|
|
|
@ -123,6 +123,16 @@ public class IMDbClientTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void getMovieDescriptor4() throws Exception {
|
||||||
|
Movie movie = imdb.getMovieDescriptor(369702, null);
|
||||||
|
|
||||||
|
assertEquals("The Sea Inside", movie.getName());
|
||||||
|
assertEquals(2004, movie.getYear());
|
||||||
|
assertEquals(369702, movie.getImdbId(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void getAkaMovieDescriptor() throws Exception {
|
public void getAkaMovieDescriptor() throws Exception {
|
||||||
Movie movie = imdb.getMovieDescriptor(106559, Locale.ENGLISH);
|
Movie movie = imdb.getMovieDescriptor(106559, Locale.ENGLISH);
|
||||||
|
|
Loading…
Reference in New Issue