From 6074680401cb86e647a9d395687e8b4b323927ad Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Thu, 11 Aug 2011 10:52:17 +0000 Subject: [PATCH] + dropped support for TV.com scraper (broken due to site changes) --- .../net/sourceforge/filebot/WebServices.java | 4 +- .../filebot/resources/search.tvdotcom.png | Bin 722 -> 0 bytes .../filebot/web/TVDotComClient.java | 200 ------------------ .../filebot/web/TVDotComClientTest.java | 134 ------------ .../sourceforge/filebot/web/WebTestSuite.java | 4 +- 5 files changed, 3 insertions(+), 339 deletions(-) delete mode 100644 source/net/sourceforge/filebot/resources/search.tvdotcom.png delete mode 100644 source/net/sourceforge/filebot/web/TVDotComClient.java delete mode 100644 test/net/sourceforge/filebot/web/TVDotComClientTest.java diff --git a/source/net/sourceforge/filebot/WebServices.java b/source/net/sourceforge/filebot/WebServices.java index be325f85..54fcb2ed 100644 --- a/source/net/sourceforge/filebot/WebServices.java +++ b/source/net/sourceforge/filebot/WebServices.java @@ -12,7 +12,6 @@ import net.sourceforge.filebot.web.SerienjunkiesClient; import net.sourceforge.filebot.web.SublightSubtitleClient; import net.sourceforge.filebot.web.SubsceneSubtitleClient; import net.sourceforge.filebot.web.SubtitleProvider; -import net.sourceforge.filebot.web.TVDotComClient; import net.sourceforge.filebot.web.TVRageClient; import net.sourceforge.filebot.web.TheTVDBClient; import net.sourceforge.filebot.web.VideoHashSubtitleService; @@ -26,7 +25,6 @@ public final class WebServices { // episode dbs public static final TVRageClient TVRage = new TVRageClient(); public static final AnidbClient AniDB = new AnidbClient("filebot", 1); - public static final TVDotComClient TVDotCom = new TVDotComClient(); public static final IMDbClient IMDb = new IMDbClient(); public static final TheTVDBClient TheTVDB = new TheTVDBClient(getApplicationProperty("thetvdb.apikey")); public static final SerienjunkiesClient Serienjunkies = new SerienjunkiesClient(getApplicationProperty("serienjunkies.apikey")); @@ -38,7 +36,7 @@ public final class WebServices { public static EpisodeListProvider[] getEpisodeListProviders() { - return new EpisodeListProvider[] { TVRage, AniDB, TVDotCom, IMDb, TheTVDB, Serienjunkies }; + return new EpisodeListProvider[] { TVRage, AniDB, IMDb, TheTVDB, Serienjunkies }; } diff --git a/source/net/sourceforge/filebot/resources/search.tvdotcom.png b/source/net/sourceforge/filebot/resources/search.tvdotcom.png deleted file mode 100644 index 6377b963c4d0ba4e157e8f61208ca542162bb67a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 722 zcmV;@0xkWCP)WdK)YWFR;oN_B1^GB7YT zATlsIFfckWFd!{3FfcG^bw4ry00I(8L_t(Ijir;{Yf@nt$3I8>0dqsnaZO8iY;xEX z3vm?LyvU1?qKhuQ2!h=N{TI94MeH`ZD|V6Bu?WFhL|`;GyqPY-((TBOuH%G;Gh=q4 zo@vkz*!Svr-{*b4&-Zm zZJnT%Ra`y~T3P*T=Dgl!aX~={++XP~E+|Y)*a6Ufni*>~QClayb6F*?w+Tx&@_HM9 zPF*|wLRhk)woVAT?f6^{E&{UCkJV=3bG5w;zj$l1MV{ z9g#Wduw%b>lh@n(q%t`Y5kCOwtr7t9foaU4Suc-P(*z=yMuMxgL*8wxB;z5Rjxic~ zgIm!#qCqdBB(nPaC6~juF`b`s0FcV$c)0Y8Y+(<8%D3+nzU%{FwOI(xxQPb6WD9$w zGC2U03nC`D6cet@$#m)(dA&_pYXT4rdJU@$FafxH9z*&d zD?lRRHxvP6w!RvqcM{hDFaZQ-+(vt}vP!jf$U*Z6fDfN)More&a|W3xiM=)g_*@Rd zk=NTiS<8*|e@Sluu(4Ay-d&rWg5y4c>>M{7ow~+a`gBk>>cCJS^yWZzj)U4KP}I2a z;O search(String query, Locale locale) throws IOException, SAXException { - // use ajax search request, because we don't need the whole search result page - URL searchUrl = new URL("http", host, "/search.php?type=Search&stype=ajax_search&search_type=program&qs=" + URLEncoder.encode(query, "UTF-8")); - - Document dom = getHtmlDocument(searchUrl); - - List searchResults = new ArrayList(); - - for (Node node : selectNodes("//H2/A", dom)) { - String title = getTextContent(node); - String href = getAttribute("href", node); - - try { - URL episodeGuideLocation = new URL(href.replaceAll("summary[.]html[?].*", "episode.html")); - searchResults.add(new HyperLink(title, episodeGuideLocation)); - } catch (MalformedURLException e) { - Logger.getLogger(getClass().getName()).log(Level.WARNING, "Invalid href: " + href, e); - } - } - - return searchResults; - } - - - @Override - public List getEpisodeList(final SearchResult searchResult, final Locale locale) throws Exception { - // get document for season 1 - Document dom = getHtmlDocument(getEpisodeListLink(searchResult, 1).toURL()); - - // seasons are ordered in reverse, first element is latest season - String latestSeasonString = selectString("id('episode_list_header')//*[contains(@class, 'number')]", dom); - - if (latestSeasonString.isEmpty()) { - // assume single season series - latestSeasonString = "1"; - } - - // strip unexpected characters from season string (e.g. "7..."); - int seasonCount = Integer.valueOf(latestSeasonString.replaceAll("\\D+", "")); - - // we're going to fetch the episode list for each season on multiple threads - List>> futures = new ArrayList>>(seasonCount); - - if (seasonCount > 1) { - // max. 12 threads so we don't get too many concurrent connections - ExecutorService executor = Executors.newFixedThreadPool(Math.min(seasonCount - 1, 12)); - - // we already have the document for season 1, start with season 2 - for (int i = 2; i <= seasonCount; i++) { - // season used in anonymous class - final int season = i; - - futures.add(executor.submit(new Callable>() { - - @Override - public List call() throws Exception { - return getEpisodeList(searchResult, season, locale); - } - })); - } - - // shutdown after all tasks are done - executor.shutdown(); - } - - List episodes = new ArrayList(25 * seasonCount); - - // get episode list from season 1 document - episodes.addAll(getEpisodeList(searchResult, dom)); - - // get episodes from executor threads - for (Future> future : futures) { - episodes.addAll(future.get()); - } - - return episodes; - } - - - @Override - public List getEpisodeList(SearchResult searchResult, int season, Locale locale) throws IOException, SAXException { - Document dom = getHtmlDocument(getEpisodeListLink(searchResult, season).toURL()); - return getEpisodeList(searchResult, dom); - } - - - private List getEpisodeList(SearchResult searchResult, Document dom) { - - List nodes = selectNodes("id('episode_guide_list')//*[@class='info']", dom); - - Pattern episodePattern = Pattern.compile("Season.(\\d+).+Episode.(\\d+)"); - Pattern airdatePattern = Pattern.compile("\\d{1,2}.\\d{1,2}.\\d{4}"); - - List episodes = new ArrayList(nodes.size()); - - for (Node node : nodes) { - String title = selectString("./H3/A/text()", node); - String meta = selectString("./*[@class='meta']", node).replaceAll("\\p{Space}+", " "); - - Integer season = null; - Integer episode = null; - Date airdate = null; - - Matcher m; - - // try to match episode information - if ((m = episodePattern.matcher(meta)).find()) { - // matches episode - season = new Integer(m.group(1)); - episode = new Integer(m.group(2)); - } - - // try to match airdate information - if ((m = airdatePattern.matcher(meta)).find()) { - airdate = Date.parse(m.group(), "MM/dd/yyyy"); // e.g. 5/20/2003 - } - - // add episode if SxE info has been found - if (season != null && episode != null) { - episodes.add(new Episode(searchResult.getName(), season, episode, title, null, null, airdate)); - } - } - - // episodes are listed in reverse order - Collections.reverse(episodes); - - return episodes; - } - - - @Override - public URI getEpisodeListLink(SearchResult searchResult) { - return getEpisodeListLink(searchResult, "All"); - } - - - @Override - public URI getEpisodeListLink(SearchResult searchResult, int season) { - return getEpisodeListLink(searchResult, Integer.toString(season)); - } - - - public URI getEpisodeListLink(SearchResult searchResult, String season) { - URL episodeGuide = ((HyperLink) searchResult).getURL(); - - return URI.create(episodeGuide + "?season=" + season); - } -} diff --git a/test/net/sourceforge/filebot/web/TVDotComClientTest.java b/test/net/sourceforge/filebot/web/TVDotComClientTest.java deleted file mode 100644 index 257abffa..00000000 --- a/test/net/sourceforge/filebot/web/TVDotComClientTest.java +++ /dev/null @@ -1,134 +0,0 @@ - -package net.sourceforge.filebot.web; - - -import static org.junit.Assert.*; - -import java.net.URL; -import java.util.List; - -import org.junit.BeforeClass; -import org.junit.Test; - - -public class TVDotComClientTest { - - private static TVDotComClient tvdotcom = new TVDotComClient(); - - private static HyperLink buffySearchResult; - - - @BeforeClass - public static void setUpBeforeClass() throws Exception { - buffySearchResult = new HyperLink("Buffy the Vampire Slayer", new URL("http://www.tv.com/buffy-the-vampire-slayer/show/10/episode.html")); - } - - - @Test - public void search() throws Exception { - List results = tvdotcom.search("buffy the vampire slayer"); - - // if this fails, there is probably a problem with the xpath query - assertEquals(4, results.size()); - - HyperLink result = (HyperLink) results.get(0); - - assertEquals(buffySearchResult.getName(), result.getName()); - assertEquals(buffySearchResult.getURL().toString(), result.getURL().toString()); - } - - - @Test - public void searchNoMatch() throws Exception { - List results = tvdotcom.search("i will not find anything for this query string"); - - assertTrue(results.isEmpty()); - } - - - @Test - public void getEpisodeList() throws Exception { - List list = tvdotcom.getEpisodeList(buffySearchResult, 7); - - assertEquals(22, list.size()); - - Episode chosen = list.get(21); - - assertEquals("Buffy the Vampire Slayer", chosen.getSeriesName()); - assertEquals("Chosen", chosen.getTitle()); - assertEquals("22", chosen.getEpisode().toString()); - assertEquals("7", chosen.getSeason().toString()); - assertEquals("2003-05-20", chosen.airdate().toString()); - } - - - @Test - public void getEpisodeListAllMultiSeason() throws Exception { - // 144 episodes / 7 seasons - List list = tvdotcom.getEpisodeList(buffySearchResult); - - assertEquals(144, list.size()); - - Episode first = list.get(0); - - assertEquals("Buffy the Vampire Slayer", first.getSeriesName()); - assertEquals("Welcome to the Hellmouth (1)", first.getTitle()); - assertEquals("1", first.getEpisode().toString()); - assertEquals("1", first.getSeason().toString()); - assertEquals("1997-03-10", first.airdate().toString()); - } - - - @Test - public void getEpisodeListAllSingleSeason() throws Exception { - // 13 episodes / 1 season only - List list = tvdotcom.getEpisodeList(tvdotcom.search("Firefly").get(0)); - - assertEquals(15, list.size()); - - Episode fourth = list.get(3); - - assertEquals("Firefly", fourth.getSeriesName()); - assertEquals("Jaynestown", fourth.getTitle()); - assertEquals("4", fourth.getEpisode().toString()); - assertEquals("1", fourth.getSeason().toString()); - assertEquals("2002-10-18", fourth.airdate().toString()); - } - - - @Test - public void getEpisodeListAllManySeasons() throws Exception { - // more than 700 episodes / 26 seasons - List list = tvdotcom.getEpisodeList(tvdotcom.search("Doctor Who (1963)").get(0)); - - // there are still new episodes coming out - assertTrue(list.size() > 700); - } - - - @Test - public void getEpisodeListEncoding() throws Exception { - List list = tvdotcom.getEpisodeList(tvdotcom.search("Lost").get(0), 3); - - Episode episode = list.get(13); - - assertEquals("Lost", episode.getSeriesName()); - assertEquals("Exposé", episode.getTitle()); - assertEquals("14", episode.getEpisode().toString()); - assertEquals("3", episode.getSeason().toString()); - assertEquals("2007-03-28", episode.airdate().toString()); - } - - - @Test - public void getEpisodeListLink() { - assertEquals(tvdotcom.getEpisodeListLink(buffySearchResult, 1).toString(), "http://www.tv.com/buffy-the-vampire-slayer/show/10/episode.html?season=1"); - } - - - @Test - public void getEpisodeListLinkAll() { - assertEquals(tvdotcom.getEpisodeListLink(buffySearchResult, 0).toString(), "http://www.tv.com/buffy-the-vampire-slayer/show/10/episode.html?season=0"); - } - -} diff --git a/test/net/sourceforge/filebot/web/WebTestSuite.java b/test/net/sourceforge/filebot/web/WebTestSuite.java index fadb03b8..25550877 100644 --- a/test/net/sourceforge/filebot/web/WebTestSuite.java +++ b/test/net/sourceforge/filebot/web/WebTestSuite.java @@ -8,8 +8,8 @@ import org.junit.runners.Suite.SuiteClasses; @RunWith(Suite.class) -@SuiteClasses( { TVDotComClientTest.class, AnidbClientTest.class, TVRageClientTest.class, TheTVDBClientTest.class, SerienjunkiesClientTest.class, TMDbClientTest.class, IMDbClientTest.class, SubsceneSubtitleClientTest.class, - SublightSubtitleClientTest.class, OpenSubtitlesXmlRpcTest.class }) +@SuiteClasses( { AnidbClientTest.class, TVRageClientTest.class, TheTVDBClientTest.class, SerienjunkiesClientTest.class, TMDbClientTest.class, IMDbClientTest.class, SubsceneSubtitleClientTest.class, SublightSubtitleClientTest.class, + OpenSubtitlesXmlRpcTest.class }) public class WebTestSuite { }