+ dropped support for TV.com scraper (broken due to site changes)
This commit is contained in:
parent
6f394dfadf
commit
6074680401
|
@ -12,7 +12,6 @@ import net.sourceforge.filebot.web.SerienjunkiesClient;
|
||||||
import net.sourceforge.filebot.web.SublightSubtitleClient;
|
import net.sourceforge.filebot.web.SublightSubtitleClient;
|
||||||
import net.sourceforge.filebot.web.SubsceneSubtitleClient;
|
import net.sourceforge.filebot.web.SubsceneSubtitleClient;
|
||||||
import net.sourceforge.filebot.web.SubtitleProvider;
|
import net.sourceforge.filebot.web.SubtitleProvider;
|
||||||
import net.sourceforge.filebot.web.TVDotComClient;
|
|
||||||
import net.sourceforge.filebot.web.TVRageClient;
|
import net.sourceforge.filebot.web.TVRageClient;
|
||||||
import net.sourceforge.filebot.web.TheTVDBClient;
|
import net.sourceforge.filebot.web.TheTVDBClient;
|
||||||
import net.sourceforge.filebot.web.VideoHashSubtitleService;
|
import net.sourceforge.filebot.web.VideoHashSubtitleService;
|
||||||
|
@ -26,7 +25,6 @@ public final class WebServices {
|
||||||
// episode dbs
|
// episode dbs
|
||||||
public static final TVRageClient TVRage = new TVRageClient();
|
public static final TVRageClient TVRage = new TVRageClient();
|
||||||
public static final AnidbClient AniDB = new AnidbClient("filebot", 1);
|
public static final AnidbClient AniDB = new AnidbClient("filebot", 1);
|
||||||
public static final TVDotComClient TVDotCom = new TVDotComClient();
|
|
||||||
public static final IMDbClient IMDb = new IMDbClient();
|
public static final IMDbClient IMDb = new IMDbClient();
|
||||||
public static final TheTVDBClient TheTVDB = new TheTVDBClient(getApplicationProperty("thetvdb.apikey"));
|
public static final TheTVDBClient TheTVDB = new TheTVDBClient(getApplicationProperty("thetvdb.apikey"));
|
||||||
public static final SerienjunkiesClient Serienjunkies = new SerienjunkiesClient(getApplicationProperty("serienjunkies.apikey"));
|
public static final SerienjunkiesClient Serienjunkies = new SerienjunkiesClient(getApplicationProperty("serienjunkies.apikey"));
|
||||||
|
@ -38,7 +36,7 @@ public final class WebServices {
|
||||||
|
|
||||||
|
|
||||||
public static EpisodeListProvider[] getEpisodeListProviders() {
|
public static EpisodeListProvider[] getEpisodeListProviders() {
|
||||||
return new EpisodeListProvider[] { TVRage, AniDB, TVDotCom, IMDb, TheTVDB, Serienjunkies };
|
return new EpisodeListProvider[] { TVRage, AniDB, IMDb, TheTVDB, Serienjunkies };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 722 B |
|
@ -1,200 +0,0 @@
|
||||||
|
|
||||||
package net.sourceforge.filebot.web;
|
|
||||||
|
|
||||||
|
|
||||||
import static net.sourceforge.filebot.web.WebRequest.*;
|
|
||||||
import static net.sourceforge.tuned.XPathUtilities.*;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URI;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.net.URLEncoder;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.concurrent.Callable;
|
|
||||||
import java.util.concurrent.ExecutorService;
|
|
||||||
import java.util.concurrent.Executors;
|
|
||||||
import java.util.concurrent.Future;
|
|
||||||
import java.util.logging.Level;
|
|
||||||
import java.util.logging.Logger;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import javax.swing.Icon;
|
|
||||||
|
|
||||||
import org.w3c.dom.Document;
|
|
||||||
import org.w3c.dom.Node;
|
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
import net.sourceforge.filebot.ResourceManager;
|
|
||||||
|
|
||||||
|
|
||||||
public class TVDotComClient extends AbstractEpisodeListProvider {
|
|
||||||
|
|
||||||
private static final String host = "www.tv.com";
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getName() {
|
|
||||||
return "TV.com";
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Icon getIcon() {
|
|
||||||
return ResourceManager.getIcon("search.tvdotcom");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<SearchResult> search(String query, Locale locale) throws IOException, SAXException {
|
|
||||||
// use ajax search request, because we don't need the whole search result page
|
|
||||||
URL searchUrl = new URL("http", host, "/search.php?type=Search&stype=ajax_search&search_type=program&qs=" + URLEncoder.encode(query, "UTF-8"));
|
|
||||||
|
|
||||||
Document dom = getHtmlDocument(searchUrl);
|
|
||||||
|
|
||||||
List<SearchResult> searchResults = new ArrayList<SearchResult>();
|
|
||||||
|
|
||||||
for (Node node : selectNodes("//H2/A", dom)) {
|
|
||||||
String title = getTextContent(node);
|
|
||||||
String href = getAttribute("href", node);
|
|
||||||
|
|
||||||
try {
|
|
||||||
URL episodeGuideLocation = new URL(href.replaceAll("summary[.]html[?].*", "episode.html"));
|
|
||||||
searchResults.add(new HyperLink(title, episodeGuideLocation));
|
|
||||||
} catch (MalformedURLException e) {
|
|
||||||
Logger.getLogger(getClass().getName()).log(Level.WARNING, "Invalid href: " + href, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return searchResults;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<Episode> getEpisodeList(final SearchResult searchResult, final Locale locale) throws Exception {
|
|
||||||
// get document for season 1
|
|
||||||
Document dom = getHtmlDocument(getEpisodeListLink(searchResult, 1).toURL());
|
|
||||||
|
|
||||||
// seasons are ordered in reverse, first element is latest season
|
|
||||||
String latestSeasonString = selectString("id('episode_list_header')//*[contains(@class, 'number')]", dom);
|
|
||||||
|
|
||||||
if (latestSeasonString.isEmpty()) {
|
|
||||||
// assume single season series
|
|
||||||
latestSeasonString = "1";
|
|
||||||
}
|
|
||||||
|
|
||||||
// strip unexpected characters from season string (e.g. "7...");
|
|
||||||
int seasonCount = Integer.valueOf(latestSeasonString.replaceAll("\\D+", ""));
|
|
||||||
|
|
||||||
// we're going to fetch the episode list for each season on multiple threads
|
|
||||||
List<Future<List<Episode>>> futures = new ArrayList<Future<List<Episode>>>(seasonCount);
|
|
||||||
|
|
||||||
if (seasonCount > 1) {
|
|
||||||
// max. 12 threads so we don't get too many concurrent connections
|
|
||||||
ExecutorService executor = Executors.newFixedThreadPool(Math.min(seasonCount - 1, 12));
|
|
||||||
|
|
||||||
// we already have the document for season 1, start with season 2
|
|
||||||
for (int i = 2; i <= seasonCount; i++) {
|
|
||||||
// season used in anonymous class
|
|
||||||
final int season = i;
|
|
||||||
|
|
||||||
futures.add(executor.submit(new Callable<List<Episode>>() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<Episode> call() throws Exception {
|
|
||||||
return getEpisodeList(searchResult, season, locale);
|
|
||||||
}
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
// shutdown after all tasks are done
|
|
||||||
executor.shutdown();
|
|
||||||
}
|
|
||||||
|
|
||||||
List<Episode> episodes = new ArrayList<Episode>(25 * seasonCount);
|
|
||||||
|
|
||||||
// get episode list from season 1 document
|
|
||||||
episodes.addAll(getEpisodeList(searchResult, dom));
|
|
||||||
|
|
||||||
// get episodes from executor threads
|
|
||||||
for (Future<List<Episode>> future : futures) {
|
|
||||||
episodes.addAll(future.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
return episodes;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<Episode> getEpisodeList(SearchResult searchResult, int season, Locale locale) throws IOException, SAXException {
|
|
||||||
Document dom = getHtmlDocument(getEpisodeListLink(searchResult, season).toURL());
|
|
||||||
return getEpisodeList(searchResult, dom);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private List<Episode> getEpisodeList(SearchResult searchResult, Document dom) {
|
|
||||||
|
|
||||||
List<Node> nodes = selectNodes("id('episode_guide_list')//*[@class='info']", dom);
|
|
||||||
|
|
||||||
Pattern episodePattern = Pattern.compile("Season.(\\d+).+Episode.(\\d+)");
|
|
||||||
Pattern airdatePattern = Pattern.compile("\\d{1,2}.\\d{1,2}.\\d{4}");
|
|
||||||
|
|
||||||
List<Episode> episodes = new ArrayList<Episode>(nodes.size());
|
|
||||||
|
|
||||||
for (Node node : nodes) {
|
|
||||||
String title = selectString("./H3/A/text()", node);
|
|
||||||
String meta = selectString("./*[@class='meta']", node).replaceAll("\\p{Space}+", " ");
|
|
||||||
|
|
||||||
Integer season = null;
|
|
||||||
Integer episode = null;
|
|
||||||
Date airdate = null;
|
|
||||||
|
|
||||||
Matcher m;
|
|
||||||
|
|
||||||
// try to match episode information
|
|
||||||
if ((m = episodePattern.matcher(meta)).find()) {
|
|
||||||
// matches episode
|
|
||||||
season = new Integer(m.group(1));
|
|
||||||
episode = new Integer(m.group(2));
|
|
||||||
}
|
|
||||||
|
|
||||||
// try to match airdate information
|
|
||||||
if ((m = airdatePattern.matcher(meta)).find()) {
|
|
||||||
airdate = Date.parse(m.group(), "MM/dd/yyyy"); // e.g. 5/20/2003
|
|
||||||
}
|
|
||||||
|
|
||||||
// add episode if SxE info has been found
|
|
||||||
if (season != null && episode != null) {
|
|
||||||
episodes.add(new Episode(searchResult.getName(), season, episode, title, null, null, airdate));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// episodes are listed in reverse order
|
|
||||||
Collections.reverse(episodes);
|
|
||||||
|
|
||||||
return episodes;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public URI getEpisodeListLink(SearchResult searchResult) {
|
|
||||||
return getEpisodeListLink(searchResult, "All");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public URI getEpisodeListLink(SearchResult searchResult, int season) {
|
|
||||||
return getEpisodeListLink(searchResult, Integer.toString(season));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public URI getEpisodeListLink(SearchResult searchResult, String season) {
|
|
||||||
URL episodeGuide = ((HyperLink) searchResult).getURL();
|
|
||||||
|
|
||||||
return URI.create(episodeGuide + "?season=" + season);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,134 +0,0 @@
|
||||||
|
|
||||||
package net.sourceforge.filebot.web;
|
|
||||||
|
|
||||||
|
|
||||||
import static org.junit.Assert.*;
|
|
||||||
|
|
||||||
import java.net.URL;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.junit.BeforeClass;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
|
|
||||||
public class TVDotComClientTest {
|
|
||||||
|
|
||||||
private static TVDotComClient tvdotcom = new TVDotComClient();
|
|
||||||
|
|
||||||
private static HyperLink buffySearchResult;
|
|
||||||
|
|
||||||
|
|
||||||
@BeforeClass
|
|
||||||
public static void setUpBeforeClass() throws Exception {
|
|
||||||
buffySearchResult = new HyperLink("Buffy the Vampire Slayer", new URL("http://www.tv.com/buffy-the-vampire-slayer/show/10/episode.html"));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void search() throws Exception {
|
|
||||||
List<SearchResult> results = tvdotcom.search("buffy the vampire slayer");
|
|
||||||
|
|
||||||
// if this fails, there is probably a problem with the xpath query
|
|
||||||
assertEquals(4, results.size());
|
|
||||||
|
|
||||||
HyperLink result = (HyperLink) results.get(0);
|
|
||||||
|
|
||||||
assertEquals(buffySearchResult.getName(), result.getName());
|
|
||||||
assertEquals(buffySearchResult.getURL().toString(), result.getURL().toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void searchNoMatch() throws Exception {
|
|
||||||
List<SearchResult> results = tvdotcom.search("i will not find anything for this query string");
|
|
||||||
|
|
||||||
assertTrue(results.isEmpty());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void getEpisodeList() throws Exception {
|
|
||||||
List<Episode> list = tvdotcom.getEpisodeList(buffySearchResult, 7);
|
|
||||||
|
|
||||||
assertEquals(22, list.size());
|
|
||||||
|
|
||||||
Episode chosen = list.get(21);
|
|
||||||
|
|
||||||
assertEquals("Buffy the Vampire Slayer", chosen.getSeriesName());
|
|
||||||
assertEquals("Chosen", chosen.getTitle());
|
|
||||||
assertEquals("22", chosen.getEpisode().toString());
|
|
||||||
assertEquals("7", chosen.getSeason().toString());
|
|
||||||
assertEquals("2003-05-20", chosen.airdate().toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void getEpisodeListAllMultiSeason() throws Exception {
|
|
||||||
// 144 episodes / 7 seasons
|
|
||||||
List<Episode> list = tvdotcom.getEpisodeList(buffySearchResult);
|
|
||||||
|
|
||||||
assertEquals(144, list.size());
|
|
||||||
|
|
||||||
Episode first = list.get(0);
|
|
||||||
|
|
||||||
assertEquals("Buffy the Vampire Slayer", first.getSeriesName());
|
|
||||||
assertEquals("Welcome to the Hellmouth (1)", first.getTitle());
|
|
||||||
assertEquals("1", first.getEpisode().toString());
|
|
||||||
assertEquals("1", first.getSeason().toString());
|
|
||||||
assertEquals("1997-03-10", first.airdate().toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void getEpisodeListAllSingleSeason() throws Exception {
|
|
||||||
// 13 episodes / 1 season only
|
|
||||||
List<Episode> list = tvdotcom.getEpisodeList(tvdotcom.search("Firefly").get(0));
|
|
||||||
|
|
||||||
assertEquals(15, list.size());
|
|
||||||
|
|
||||||
Episode fourth = list.get(3);
|
|
||||||
|
|
||||||
assertEquals("Firefly", fourth.getSeriesName());
|
|
||||||
assertEquals("Jaynestown", fourth.getTitle());
|
|
||||||
assertEquals("4", fourth.getEpisode().toString());
|
|
||||||
assertEquals("1", fourth.getSeason().toString());
|
|
||||||
assertEquals("2002-10-18", fourth.airdate().toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void getEpisodeListAllManySeasons() throws Exception {
|
|
||||||
// more than 700 episodes / 26 seasons
|
|
||||||
List<Episode> list = tvdotcom.getEpisodeList(tvdotcom.search("Doctor Who (1963)").get(0));
|
|
||||||
|
|
||||||
// there are still new episodes coming out
|
|
||||||
assertTrue(list.size() > 700);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void getEpisodeListEncoding() throws Exception {
|
|
||||||
List<Episode> list = tvdotcom.getEpisodeList(tvdotcom.search("Lost").get(0), 3);
|
|
||||||
|
|
||||||
Episode episode = list.get(13);
|
|
||||||
|
|
||||||
assertEquals("Lost", episode.getSeriesName());
|
|
||||||
assertEquals("Exposé", episode.getTitle());
|
|
||||||
assertEquals("14", episode.getEpisode().toString());
|
|
||||||
assertEquals("3", episode.getSeason().toString());
|
|
||||||
assertEquals("2007-03-28", episode.airdate().toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void getEpisodeListLink() {
|
|
||||||
assertEquals(tvdotcom.getEpisodeListLink(buffySearchResult, 1).toString(), "http://www.tv.com/buffy-the-vampire-slayer/show/10/episode.html?season=1");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void getEpisodeListLinkAll() {
|
|
||||||
assertEquals(tvdotcom.getEpisodeListLink(buffySearchResult, 0).toString(), "http://www.tv.com/buffy-the-vampire-slayer/show/10/episode.html?season=0");
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -8,8 +8,8 @@ import org.junit.runners.Suite.SuiteClasses;
|
||||||
|
|
||||||
|
|
||||||
@RunWith(Suite.class)
|
@RunWith(Suite.class)
|
||||||
@SuiteClasses( { TVDotComClientTest.class, AnidbClientTest.class, TVRageClientTest.class, TheTVDBClientTest.class, SerienjunkiesClientTest.class, TMDbClientTest.class, IMDbClientTest.class, SubsceneSubtitleClientTest.class,
|
@SuiteClasses( { AnidbClientTest.class, TVRageClientTest.class, TheTVDBClientTest.class, SerienjunkiesClientTest.class, TMDbClientTest.class, IMDbClientTest.class, SubsceneSubtitleClientTest.class, SublightSubtitleClientTest.class,
|
||||||
SublightSubtitleClientTest.class, OpenSubtitlesXmlRpcTest.class })
|
OpenSubtitlesXmlRpcTest.class })
|
||||||
public class WebTestSuite {
|
public class WebTestSuite {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue