* adapt tv.com scraper to site changes
This commit is contained in:
parent
afbc49369a
commit
57df9b1fbc
|
@ -62,16 +62,14 @@ public class TVDotComClient implements EpisodeListProvider {
|
|||
|
||||
Document dom = getHtmlDocument(searchUrl);
|
||||
|
||||
List<Node> nodes = selectNodes("//*[@class='title']//descendant-or-self::A", dom);
|
||||
List<SearchResult> searchResults = new ArrayList<SearchResult>();
|
||||
|
||||
List<SearchResult> searchResults = new ArrayList<SearchResult>(nodes.size());
|
||||
|
||||
for (Node node : nodes) {
|
||||
for (Node node : selectNodes("//H2/A", dom)) {
|
||||
String title = getTextContent(node);
|
||||
String href = getAttribute("href", node);
|
||||
|
||||
try {
|
||||
URL episodeGuideLocation = new URL(href.replaceAll("summary\\.html\\?.*", "episode.html"));
|
||||
URL episodeGuideLocation = new URL(href.replaceAll("summary[.]html[?].*", "episode.html"));
|
||||
searchResults.add(new HyperLink(title, episodeGuideLocation));
|
||||
} catch (MalformedURLException e) {
|
||||
Logger.getLogger(getClass().getName()).log(Level.WARNING, "Invalid href: " + href, e);
|
||||
|
|
|
@ -26,10 +26,10 @@ public class TVDotComClientTest {
|
|||
|
||||
@Test
|
||||
public void search() throws Exception {
|
||||
List<SearchResult> results = tvdotcom.search("Buffy");
|
||||
List<SearchResult> results = tvdotcom.search("buffy the vampire slayer");
|
||||
|
||||
// if this fails, there is probably a problem with the xpath query
|
||||
assertEquals(10, results.size());
|
||||
assertEquals(4, results.size());
|
||||
|
||||
HyperLink result = (HyperLink) results.get(0);
|
||||
|
||||
|
|
Loading…
Reference in New Issue