* imdb page scraper helper for people that really need it

This commit is contained in:
Reinhard Pointner 2013-07-24 04:59:13 +00:00
parent c8b6485d2a
commit 6217589f82
2 changed files with 6 additions and 1 deletions

View File

@ -47,7 +47,7 @@ String.metaClass.pad = Number.metaClass.pad = { length = 2, padding = "0" -> del
* Return a substring matching the given pattern or break.
*/
String.metaClass.match = { String pattern, matchGroup = null ->
def matcher = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.MULTILINE).matcher(delegate)
def matcher = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.MULTILINE | Pattern.DOTALL).matcher(delegate)
if (matcher.find())
return matcher.groupCount() > 0 && matchGroup == null ? matcher.group(1) : matcher.group(matchGroup ?: 0)
else

View File

@ -155,6 +155,11 @@ public class IMDbClient implements MovieIdentificationService {
}
public String scrape(String imdbid, String xpath) throws IOException, SAXException {
return selectString(xpath, parsePage(getMoviePageLink(getImdbId(imdbid)).toURL())); // helper for scraping data in user scripts
}
public URI getMoviePageLink(int imdbId) {
return URI.create(String.format("http://www.imdb.com/title/tt%07d/", imdbId));
}