* adapt tv.com scraper to site changes

This commit is contained in:
Reinhard Pointner 2009-07-25 13:36:29 +00:00
parent afbc49369a
commit 57df9b1fbc
2 changed files with 5 additions and 7 deletions

View File

@ -62,16 +62,14 @@ public class TVDotComClient implements EpisodeListProvider {
Document dom = getHtmlDocument(searchUrl); Document dom = getHtmlDocument(searchUrl);
List<Node> nodes = selectNodes("//*[@class='title']//descendant-or-self::A", dom); List<SearchResult> searchResults = new ArrayList<SearchResult>();
List<SearchResult> searchResults = new ArrayList<SearchResult>(nodes.size()); for (Node node : selectNodes("//H2/A", dom)) {
for (Node node : nodes) {
String title = getTextContent(node); String title = getTextContent(node);
String href = getAttribute("href", node); String href = getAttribute("href", node);
try { try {
URL episodeGuideLocation = new URL(href.replaceAll("summary\\.html\\?.*", "episode.html")); URL episodeGuideLocation = new URL(href.replaceAll("summary[.]html[?].*", "episode.html"));
searchResults.add(new HyperLink(title, episodeGuideLocation)); searchResults.add(new HyperLink(title, episodeGuideLocation));
} catch (MalformedURLException e) { } catch (MalformedURLException e) {
Logger.getLogger(getClass().getName()).log(Level.WARNING, "Invalid href: " + href, e); Logger.getLogger(getClass().getName()).log(Level.WARNING, "Invalid href: " + href, e);

View File

@ -26,10 +26,10 @@ public class TVDotComClientTest {
@Test @Test
public void search() throws Exception { public void search() throws Exception {
List<SearchResult> results = tvdotcom.search("Buffy"); List<SearchResult> results = tvdotcom.search("buffy the vampire slayer");
// if this fails, there is probably a problem with the xpath query // if this fails, there is probably a problem with the xpath query
assertEquals(10, results.size()); assertEquals(4, results.size());
HyperLink result = (HyperLink) results.get(0); HyperLink result = (HyperLink) results.get(0);