* heavily improved Subscene support (up to 35x faster)
This commit is contained in:
parent
a94cedd601
commit
9eb74e8038
|
@ -6,7 +6,6 @@ import java.io.IOException;
|
|||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.net.URLEncoder;
|
||||
import java.text.NumberFormat;
|
||||
|
@ -60,10 +59,8 @@ public class AnidbClient extends EpisodeListClient {
|
|||
String path = "/perl-bin/" + href;
|
||||
|
||||
try {
|
||||
URI animeUrl = new URI("http", host, path, null);
|
||||
|
||||
searchResults.add(new HyperLink(title, animeUrl));
|
||||
} catch (URISyntaxException e) {
|
||||
searchResults.add(new HyperLink(title, new URL("http", host, path)));
|
||||
} catch (MalformedURLException e) {
|
||||
Logger.getLogger(Logger.GLOBAL_LOGGER_NAME).log(Level.WARNING, "Invalid href: " + href);
|
||||
}
|
||||
}
|
||||
|
@ -76,7 +73,7 @@ public class AnidbClient extends EpisodeListClient {
|
|||
String header = XPathUtil.selectString("id('layout-content')//H1[1]", dom);
|
||||
String title = header.replaceFirst("Anime:\\s*", "");
|
||||
|
||||
searchResults.add(new HyperLink(title, URI.create(getSearchUrl(searchterm).toString())));
|
||||
searchResults.add(new HyperLink(title, getSearchUrl(searchterm)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -123,7 +120,7 @@ public class AnidbClient extends EpisodeListClient {
|
|||
|
||||
@Override
|
||||
public URI getEpisodeListLink(SearchResult searchResult) {
|
||||
return ((HyperLink) searchResult).getURI();
|
||||
return ((HyperLink) searchResult).toURI();
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ import java.net.URI;
|
|||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Map;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
|
@ -56,6 +57,17 @@ public class HtmlUtil {
|
|||
}
|
||||
|
||||
|
||||
public static Document getHtmlDocument(URL url, Map<String, String> requestHeaders) throws IOException, SAXException {
|
||||
URLConnection connection = url.openConnection();
|
||||
|
||||
for (String key : requestHeaders.keySet()) {
|
||||
connection.addRequestProperty(key, requestHeaders.get(key));
|
||||
}
|
||||
|
||||
return getHtmlDocument(connection);
|
||||
}
|
||||
|
||||
|
||||
public static Document getHtmlDocument(URLConnection connection) throws IOException, SAXException {
|
||||
Charset charset = getCharset(connection.getContentType());
|
||||
String encoding = connection.getContentEncoding();
|
||||
|
|
|
@ -4,26 +4,31 @@ package net.sourceforge.filebot.web;
|
|||
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
|
||||
|
||||
public class HyperLink extends SearchResult {
|
||||
|
||||
private final URI uri;
|
||||
private final URL url;
|
||||
|
||||
|
||||
public HyperLink(String name, URI uri) {
|
||||
public HyperLink(String name, URL url) {
|
||||
super(name);
|
||||
this.uri = uri;
|
||||
this.url = url;
|
||||
}
|
||||
|
||||
|
||||
public HyperLink(String name, String uri) throws URISyntaxException {
|
||||
this(name, new URI(uri));
|
||||
public URL getURL() {
|
||||
return url;
|
||||
}
|
||||
|
||||
|
||||
public URI getURI() {
|
||||
return uri;
|
||||
public URI toURI() {
|
||||
try {
|
||||
return url.toURI();
|
||||
} catch (URISyntaxException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -6,15 +6,16 @@ import java.io.IOException;
|
|||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Scanner;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
|
@ -33,6 +34,8 @@ public class SubsceneSubtitleClient extends SubtitleClient {
|
|||
|
||||
private final SearchResultCache cache = new SearchResultCache();
|
||||
|
||||
private final Map<String, Integer> languageFilterMap = new ConcurrentHashMap<String, Integer>(50);
|
||||
|
||||
private final String host = "subscene.com";
|
||||
|
||||
|
||||
|
@ -56,13 +59,14 @@ public class SubsceneSubtitleClient extends SubtitleClient {
|
|||
for (Node node : nodes) {
|
||||
String title = XPathUtil.selectString("text()", node);
|
||||
String href = XPathUtil.selectString("@href", node);
|
||||
String count = XPathUtil.selectString("./DFN", node).replaceAll("\\D+", "");
|
||||
|
||||
try {
|
||||
//TODO which exception?
|
||||
URI url = new URI("http", host, href);
|
||||
URL subtitleListUrl = new URL("http", host, href);
|
||||
int subtitleCount = Integer.parseInt(count);
|
||||
|
||||
searchResults.add(new HyperLink(title, url));
|
||||
} catch (URISyntaxException e) {
|
||||
searchResults.add(new SubsceneSearchResult(title, subtitleListUrl, subtitleCount));
|
||||
} catch (MalformedURLException e) {
|
||||
Logger.getLogger(Logger.GLOBAL_LOGGER_NAME).log(Level.WARNING, "Invalid href: " + href, e);
|
||||
}
|
||||
}
|
||||
|
@ -72,85 +76,127 @@ public class SubsceneSubtitleClient extends SubtitleClient {
|
|||
return searchResults;
|
||||
}
|
||||
|
||||
HashMap<String, String> languageIdCache;
|
||||
|
||||
private void updateLanguageFilterMap(Document subtitleListDocument) {
|
||||
|
||||
List<Node> nodes = XPathUtil.selectNodes("//DIV[@class='languageList']/DIV", subtitleListDocument);
|
||||
|
||||
for (Node node : nodes) {
|
||||
String onClick = XPathUtil.selectString("./INPUT/@onclick", node);
|
||||
|
||||
String filter = new Scanner(onClick).findInLine("\\d+");
|
||||
|
||||
if (filter != null) {
|
||||
String name = XPathUtil.selectString("./LABEL/text()", node);
|
||||
|
||||
languageFilterMap.put(name.toLowerCase(), Integer.valueOf(filter));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private Integer getLanguageFilter(String languageName) {
|
||||
if (languageName == null)
|
||||
return null;
|
||||
|
||||
return languageFilterMap.get(languageName.toLowerCase());
|
||||
}
|
||||
|
||||
public String getLanguageID(Locale language) {
|
||||
return languageIdCache.get(language.getDisplayLanguage(Locale.ENGLISH).toLowerCase());
|
||||
|
||||
private String getLanguageName(Locale language) {
|
||||
if (language == null || language == Locale.ROOT)
|
||||
return null;
|
||||
|
||||
return language.getDisplayLanguage(Locale.ENGLISH);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<SubtitleDescriptor> getSubtitleList(SearchResult searchResult, Locale language) throws Exception {
|
||||
|
||||
URL url = getSubtitleListLink(searchResult).toURL();
|
||||
URL subtitleListUrl = getSubtitleListLink(searchResult).toURL();
|
||||
String languageName = getLanguageName(language);
|
||||
Integer languageFilter = getLanguageFilter(languageName);
|
||||
|
||||
Document dom = null;
|
||||
boolean reloadFilteredDocument = (languageFilter == null && useFilteredDocument(searchResult));
|
||||
boolean forceReload = false;
|
||||
|
||||
if (languageIdCache != null) {
|
||||
URLConnection connection = url.openConnection();
|
||||
if (reloadFilteredDocument && languageFilterMap.isEmpty()) {
|
||||
// we don't know the filter values yet, so we request a document with an invalid filter,
|
||||
// that will return a subtitle document very fast
|
||||
languageFilter = -1;
|
||||
forceReload = true;
|
||||
}
|
||||
|
||||
Document subtitleListDocument = getSubtitleListDocument(subtitleListUrl, languageFilter);
|
||||
|
||||
if (languageFilterMap.isEmpty()) {
|
||||
updateLanguageFilterMap(subtitleListDocument);
|
||||
}
|
||||
|
||||
// check if document is already filtered and if requesting a filtered document
|
||||
// will result in a performance gain (Note: XPath can be very slow)
|
||||
if (reloadFilteredDocument) {
|
||||
languageFilter = getLanguageFilter(languageName);
|
||||
|
||||
if (language != null && language != Locale.ROOT) {
|
||||
System.out.println(getLanguageID(language));
|
||||
connection.addRequestProperty("Cookie", "subscene_sLanguageIds=" + getLanguageID(language));
|
||||
}
|
||||
|
||||
dom = HtmlUtil.getHtmlDocument(connection);
|
||||
} else {
|
||||
URLConnection connection = url.openConnection();
|
||||
|
||||
dom = HtmlUtil.getHtmlDocument(connection);
|
||||
|
||||
List<Node> nodes = XPathUtil.selectNodes("//DIV[@class='languageList']/DIV", dom);
|
||||
|
||||
Pattern onClickPattern = Pattern.compile("selectLanguage\\((\\d+)\\);");
|
||||
|
||||
languageIdCache = new HashMap<String, String>();
|
||||
|
||||
for (Node node : nodes) {
|
||||
Matcher matcher = onClickPattern.matcher(XPathUtil.selectString("./INPUT/@onclick", node));
|
||||
|
||||
if (matcher.matches()) {
|
||||
String name = XPathUtil.selectString("./LABEL/text()", node);
|
||||
String id = matcher.group(1);
|
||||
|
||||
//TODO sysout
|
||||
System.out.println(name + " = " + id);
|
||||
|
||||
languageIdCache.put(name.toLowerCase(), id);
|
||||
}
|
||||
// if language filter has become available, request a filtered document, or if first request was a dummy request
|
||||
if (languageFilter != null || forceReload) {
|
||||
subtitleListDocument = getSubtitleListDocument(subtitleListUrl, languageFilter);
|
||||
}
|
||||
}
|
||||
|
||||
List<Node> nodes = XPathUtil.selectNodes("//TABLE[@class='filmSubtitleList']//A[@id]//ancestor::TR", dom);
|
||||
return getSubtitleList(subtitleListUrl, languageName, subtitleListDocument);
|
||||
}
|
||||
|
||||
|
||||
private boolean useFilteredDocument(SearchResult searchResult) {
|
||||
SubsceneSearchResult sr = (SubsceneSearchResult) searchResult;
|
||||
return sr.getSubtitleCount() > 100;
|
||||
}
|
||||
|
||||
|
||||
private Document getSubtitleListDocument(URL subtitleListUrl, Integer languageFilter) throws IOException, SAXException {
|
||||
Map<String, String> requestHeaders = new HashMap<String, String>(1);
|
||||
|
||||
Pattern hrefPattern = Pattern.compile("javascript:Subtitle\\((\\d+), '(\\w+)', '\\d+', '(\\d+)'\\);");
|
||||
if (languageFilter != null) {
|
||||
requestHeaders.put("Cookie", "subscene_sLanguageIds=" + languageFilter);
|
||||
}
|
||||
|
||||
ArrayList<SubtitleDescriptor> subtitles = new ArrayList<SubtitleDescriptor>(nodes.size());
|
||||
return HtmlUtil.getHtmlDocument(subtitleListUrl, requestHeaders);
|
||||
}
|
||||
|
||||
|
||||
private List<SubtitleDescriptor> getSubtitleList(URL subtitleListUrl, String languageName, Document subtitleListDocument) {
|
||||
|
||||
List<Node> nodes = XPathUtil.selectNodes("//TABLE[@class='filmSubtitleList']//A[@id]//ancestor::TR", subtitleListDocument);
|
||||
|
||||
Pattern hrefPattern = Pattern.compile("javascript:Subtitle\\((\\d+), '(\\w+)', .*");
|
||||
|
||||
List<SubtitleDescriptor> subtitles = new ArrayList<SubtitleDescriptor>(nodes.size());
|
||||
|
||||
for (Node node : nodes) {
|
||||
try {
|
||||
Node linkNode = XPathUtil.selectFirstNode("./TD[1]/A", node);
|
||||
|
||||
String lang = XPathUtil.selectString("./SPAN[1]", linkNode);
|
||||
|
||||
String href = XPathUtil.selectString("@href", linkNode);
|
||||
|
||||
String name = XPathUtil.selectString("./SPAN[2]", linkNode);
|
||||
|
||||
String author = XPathUtil.selectString("./TD[4]", node);
|
||||
|
||||
Matcher matcher = hrefPattern.matcher(href);
|
||||
|
||||
if (!matcher.matches())
|
||||
throw new IllegalArgumentException("Cannot extract download parameters: " + href);
|
||||
|
||||
String subtitleId = matcher.group(1);
|
||||
String typeId = matcher.group(2);
|
||||
|
||||
URL downloadUrl = getDownloadUrl(url, subtitleId, typeId);
|
||||
|
||||
subtitles.add(new SubsceneSubtitleDescriptor(name, lang, author, typeId, downloadUrl, url));
|
||||
if (languageName == null || languageName.equalsIgnoreCase(lang)) {
|
||||
|
||||
String href = XPathUtil.selectString("@href", linkNode);
|
||||
String name = XPathUtil.selectString("./SPAN[2]", linkNode);
|
||||
String author = XPathUtil.selectString("./TD[4]", node);
|
||||
|
||||
Matcher matcher = hrefPattern.matcher(href);
|
||||
|
||||
if (!matcher.matches())
|
||||
throw new IllegalArgumentException("Cannot extract download parameters: " + href);
|
||||
|
||||
String subtitleId = matcher.group(1);
|
||||
String typeId = matcher.group(2);
|
||||
|
||||
URL downloadUrl = getDownloadUrl(subtitleListUrl, subtitleId, typeId);
|
||||
|
||||
subtitles.add(new SubsceneSubtitleDescriptor(name, lang, author, typeId, downloadUrl, subtitleListUrl));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
Logger.getLogger(Logger.GLOBAL_LOGGER_NAME).log(Level.WARNING, "Cannot parse subtitle node", e);
|
||||
}
|
||||
|
@ -170,7 +216,7 @@ public class SubsceneSubtitleClient extends SubtitleClient {
|
|||
|
||||
@Override
|
||||
public URI getSubtitleListLink(SearchResult searchResult) {
|
||||
return ((HyperLink) searchResult).getURI();
|
||||
return ((HyperLink) searchResult).toURI();
|
||||
}
|
||||
|
||||
|
||||
|
@ -180,4 +226,22 @@ public class SubsceneSubtitleClient extends SubtitleClient {
|
|||
return new URL("http", host, file);
|
||||
}
|
||||
|
||||
|
||||
protected static class SubsceneSearchResult extends HyperLink {
|
||||
|
||||
private final int subtitleCount;
|
||||
|
||||
|
||||
public SubsceneSearchResult(String name, URL url, int subtitleCount) {
|
||||
super(name, url);
|
||||
this.subtitleCount = subtitleCount;
|
||||
}
|
||||
|
||||
|
||||
public int getSubtitleCount() {
|
||||
return subtitleCount;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -6,7 +6,6 @@ import java.io.IOException;
|
|||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.net.URLEncoder;
|
||||
import java.text.NumberFormat;
|
||||
|
@ -65,10 +64,10 @@ public class TVDotComClient extends EpisodeListClient {
|
|||
String href = XPathUtil.selectString("@href", node);
|
||||
|
||||
try {
|
||||
String episodeListingUrl = href.replaceFirst(Pattern.quote("summary.html?") + ".*", "episode_listings.html");
|
||||
URL episodeListingUrl = new URL(href.replaceFirst(Pattern.quote("summary.html?") + ".*", "episode_listings.html"));
|
||||
|
||||
searchResults.add(new HyperLink(title, episodeListingUrl));
|
||||
} catch (URISyntaxException e) {
|
||||
} catch (MalformedURLException e) {
|
||||
Logger.getLogger(Logger.GLOBAL_LOGGER_NAME).log(Level.WARNING, "Invalid href: " + href, e);
|
||||
}
|
||||
}
|
||||
|
@ -169,7 +168,7 @@ public class TVDotComClient extends EpisodeListClient {
|
|||
|
||||
@Override
|
||||
public URI getEpisodeListLink(SearchResult searchResult, int season) {
|
||||
String episodeListingUrl = ((HyperLink) searchResult).getURI().toString();
|
||||
URL episodeListingUrl = ((HyperLink) searchResult).getURL();
|
||||
|
||||
return URI.create(episodeListingUrl + "?season=" + season);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
|
||||
package net.sourceforge.filebot.web;
|
||||
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.net.URL;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import net.sourceforge.filebot.ui.panel.subtitle.LanguageResolver;
|
||||
import net.sourceforge.filebot.web.SubsceneSubtitleClient.SubsceneSearchResult;
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
public class SubsceneSubtitleClientTest {
|
||||
|
||||
private static SubsceneSearchResult testResult;
|
||||
private static SubsceneSearchResult manySubtitlesTestResult;
|
||||
|
||||
private SubsceneSubtitleClient client = new SubsceneSubtitleClient();
|
||||
|
||||
|
||||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
testResult = new SubsceneSearchResult("Twin Peaks - First Season (1990)", new URL("http://subscene.com/twin-peaks--first-season/subtitles-32482.aspx"), 17);
|
||||
manySubtitlesTestResult = new SubsceneSearchResult("Lost - Fourth Season (2008)", new URL("http://subscene.com/Lost-Fourth-Season/subtitles-70963.aspx"), 420);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void search() throws Exception {
|
||||
List<SearchResult> results = client.search("twin peaks");
|
||||
|
||||
SubsceneSearchResult result = (SubsceneSearchResult) results.get(1);
|
||||
|
||||
assertEquals(testResult.getName(), result.getName());
|
||||
assertEquals(testResult.getURL().toString(), result.getURL().toString());
|
||||
assertEquals(testResult.getSubtitleCount(), result.getSubtitleCount());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void getSubtitleListSearchResult() throws Exception {
|
||||
List<SubtitleDescriptor> subtitleList = client.getSubtitleList(testResult, Locale.ITALIAN);
|
||||
|
||||
assertEquals(1, subtitleList.size());
|
||||
|
||||
SubtitleDescriptor subtitle = subtitleList.get(0);
|
||||
|
||||
assertEquals("Twin Peaks - First Season", subtitle.getName());
|
||||
assertEquals("Italian", subtitle.getLanguageName());
|
||||
assertEquals("zip", subtitle.getArchiveType());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void getSubtitleListSearchResultMany() throws Exception {
|
||||
List<SubtitleDescriptor> subtitleList = client.getSubtitleList(manySubtitlesTestResult, LanguageResolver.getDefault().getLocale("Vietnamese"));
|
||||
|
||||
assertEquals(1, subtitleList.size());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void getSubtitleListLink() throws Exception {
|
||||
assertEquals(testResult.getURL().toString(), client.getSubtitleListLink(testResult).toURL().toString());
|
||||
}
|
||||
|
||||
}
|
|
@ -4,21 +4,30 @@ package net.sourceforge.filebot.web;
|
|||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.net.URI;
|
||||
import java.net.URL;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
public class TVDotComClientTest {
|
||||
|
||||
private static TVDotComClient tvdotcom = new TVDotComClient();
|
||||
private static HyperLink testResult;
|
||||
private static HyperLink singleSeasonTestResult;
|
||||
private static HyperLink manySeasonsTestResult;
|
||||
|
||||
private static HyperLink testResult = new HyperLink("Buffy the Vampire Slayer", URI.create("http://www.tv.com/buffy-the-vampire-slayer/show/10/episode_listings.html"));
|
||||
private static HyperLink singleSeasonTestResult = new HyperLink("Firefly", URI.create("http://www.tv.com/firefly/show/7097/episode_listings.html"));
|
||||
private static HyperLink manySeasonsTestResult = new HyperLink("Doctor Who", URI.create("http://www.tv.com/doctor-who/show/355/episode_listings.html"));
|
||||
private TVDotComClient tvdotcom = new TVDotComClient();
|
||||
|
||||
|
||||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
testResult = new HyperLink("Buffy the Vampire Slayer", new URL("http://www.tv.com/buffy-the-vampire-slayer/show/10/episode_listings.html"));
|
||||
singleSeasonTestResult = new HyperLink("Firefly", new URL("http://www.tv.com/firefly/show/7097/episode_listings.html"));
|
||||
manySeasonsTestResult = new HyperLink("Doctor Who", new URL("http://www.tv.com/doctor-who/show/355/episode_listings.html"));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void search() throws Exception {
|
||||
List<SearchResult> results = tvdotcom.search("Buffy");
|
||||
|
@ -26,7 +35,7 @@ public class TVDotComClientTest {
|
|||
HyperLink result = (HyperLink) results.get(0);
|
||||
|
||||
assertEquals(testResult.getName(), result.getName());
|
||||
assertEquals(testResult.getURI(), result.getURI());
|
||||
assertEquals(testResult.getURL().toString(), result.getURL().toString());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -13,9 +13,10 @@ import org.junit.Test;
|
|||
|
||||
public class TVRageClientTest {
|
||||
|
||||
private static TVRageClient tvrage = new TVRageClient();
|
||||
private static TVRageSearchResult testResult = new TVRageSearchResult("Buffy the Vampire Slayer", 2930, "http://www.tvrage.com/Buffy_The_Vampire_Slayer");
|
||||
|
||||
private TVRageClient tvrage = new TVRageClient();
|
||||
|
||||
|
||||
@Test
|
||||
public void search() throws Exception {
|
||||
|
|
Loading…
Reference in New Issue