* imdb page scraper helper for people that really need it
This commit is contained in:
parent
c8b6485d2a
commit
6217589f82
|
@ -47,7 +47,7 @@ String.metaClass.pad = Number.metaClass.pad = { length = 2, padding = "0" -> del
|
|||
* Return a substring matching the given pattern or break.
|
||||
*/
|
||||
String.metaClass.match = { String pattern, matchGroup = null ->
|
||||
def matcher = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.MULTILINE).matcher(delegate)
|
||||
def matcher = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.MULTILINE | Pattern.DOTALL).matcher(delegate)
|
||||
if (matcher.find())
|
||||
return matcher.groupCount() > 0 && matchGroup == null ? matcher.group(1) : matcher.group(matchGroup ?: 0)
|
||||
else
|
||||
|
|
|
@ -155,6 +155,11 @@ public class IMDbClient implements MovieIdentificationService {
|
|||
}
|
||||
|
||||
|
||||
public String scrape(String imdbid, String xpath) throws IOException, SAXException {
|
||||
return selectString(xpath, parsePage(getMoviePageLink(getImdbId(imdbid)).toURL())); // helper for scraping data in user scripts
|
||||
}
|
||||
|
||||
|
||||
public URI getMoviePageLink(int imdbId) {
|
||||
return URI.create(String.format("http://www.imdb.com/title/tt%07d/", imdbId));
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue