* fixed/updated Subscene scraper
* fixed/updated Sublight webservice * updated test cases related to various web resources
This commit is contained in:
parent
531b455da2
commit
d3bdcf597e
Binary file not shown.
|
@ -73,7 +73,7 @@ public class IMDbClient implements EpisodeListProvider {
|
|||
if (results.isEmpty()) {
|
||||
try {
|
||||
String name = normalizeName(selectString("//H1/text()", dom));
|
||||
String year = selectString("//H1//A", dom);
|
||||
String year = new Scanner(selectString("//H1//SPAN", dom)).useDelimiter("\\D+").next();
|
||||
String url = selectString("//LINK[@rel='canonical']/@href", dom);
|
||||
|
||||
results.add(new MovieDescriptor(name, Integer.parseInt(year), getImdbId(url)));
|
||||
|
|
|
@ -29,10 +29,10 @@ import net.sublight.webservice.ClientInfo;
|
|||
import net.sublight.webservice.Genre;
|
||||
import net.sublight.webservice.IMDB;
|
||||
import net.sublight.webservice.Release;
|
||||
import net.sublight.webservice.Sublight;
|
||||
import net.sublight.webservice.SublightSoap;
|
||||
import net.sublight.webservice.Subtitle;
|
||||
import net.sublight.webservice.SubtitleLanguage;
|
||||
import net.sublight.webservice.SubtitlesAPI2;
|
||||
import net.sublight.webservice.SubtitlesAPI2Soap;
|
||||
|
||||
|
||||
public class SublightSubtitleClient implements SubtitleProvider, VideoHashSubtitleService {
|
||||
|
@ -41,7 +41,7 @@ public class SublightSubtitleClient implements SubtitleProvider, VideoHashSubtit
|
|||
|
||||
private final ClientInfo clientInfo = new ClientInfo();
|
||||
|
||||
private SubtitlesAPI2Soap webservice;
|
||||
private SublightSoap webservice;
|
||||
|
||||
private String session;
|
||||
|
||||
|
@ -273,19 +273,23 @@ public class SublightSubtitleClient implements SubtitleProvider, VideoHashSubtit
|
|||
}
|
||||
|
||||
|
||||
protected byte[] getZipArchive(Subtitle subtitle) throws WebServiceException {
|
||||
protected byte[] getZipArchive(Subtitle subtitle) throws WebServiceException, InterruptedException {
|
||||
// require login
|
||||
login();
|
||||
|
||||
Holder<String> ticket = new Holder<String>();
|
||||
Holder<Short> que = new Holder<Short>();
|
||||
Holder<byte[]> data = new Holder<byte[]>();
|
||||
Holder<String> error = new Holder<String>();
|
||||
|
||||
webservice.getDownloadTicket(session, null, subtitle.getSubtitleID(), null, ticket, null, error);
|
||||
webservice.getDownloadTicket2(session, null, subtitle.getSubtitleID(), null, ticket, que, null, error);
|
||||
|
||||
// abort if something went wrong
|
||||
checkError(error);
|
||||
|
||||
// wait x seconds as specified by the download ticket response, download ticket is not valid until then
|
||||
Thread.sleep(que.value * 1000);
|
||||
|
||||
webservice.downloadByID4(session, subtitle.getSubtitleID(), -1, false, ticket.value, null, data, null, error);
|
||||
|
||||
// abort if something went wrong
|
||||
|
@ -306,7 +310,7 @@ public class SublightSubtitleClient implements SubtitleProvider, VideoHashSubtit
|
|||
protected synchronized void login() throws WebServiceException {
|
||||
if (webservice == null) {
|
||||
// lazy initialize because all the JAX-WS class loading can take quite some time
|
||||
webservice = new SubtitlesAPI2().getSubtitlesAPI2Soap();
|
||||
webservice = new Sublight().getSublightSoap();
|
||||
}
|
||||
|
||||
if (session == null) {
|
||||
|
|
|
@ -17,8 +17,6 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import javax.swing.Icon;
|
||||
|
||||
|
@ -28,7 +26,6 @@ import org.xml.sax.SAXException;
|
|||
|
||||
import net.sourceforge.filebot.ResourceManager;
|
||||
import net.sourceforge.filebot.Settings;
|
||||
import net.sourceforge.tuned.FileUtilities;
|
||||
|
||||
|
||||
public class SubsceneSubtitleClient implements SubtitleProvider {
|
||||
|
@ -128,9 +125,6 @@ public class SubsceneSubtitleClient implements SubtitleProvider {
|
|||
|
||||
List<Node> nodes = selectNodes("//TABLE[@class='filmSubtitleList']//A[@class='a1']", subtitleListDocument);
|
||||
|
||||
// match subtitleId and typeId
|
||||
Pattern hrefPattern = Pattern.compile("javascript:Subtitle\\((\\d+), '(\\w+)', .*");
|
||||
|
||||
List<SubtitleDescriptor> subtitles = new ArrayList<SubtitleDescriptor>(nodes.size());
|
||||
|
||||
for (Node node : nodes) {
|
||||
|
@ -140,18 +134,9 @@ public class SubsceneSubtitleClient implements SubtitleProvider {
|
|||
if (languageName == null || languageName.equalsIgnoreCase(lang)) {
|
||||
String name = getTextContent(getChildren("SPAN", node).get(1));
|
||||
String href = getAttribute("href", node);
|
||||
URL subtitlePage = new URL(subtitleListUrl.getProtocol(), subtitleListUrl.getHost(), href);
|
||||
|
||||
Matcher matcher = hrefPattern.matcher(href);
|
||||
|
||||
if (!matcher.matches())
|
||||
throw new IllegalArgumentException("Cannot parse download parameters: " + href);
|
||||
|
||||
String subtitleId = matcher.group(1);
|
||||
String archiveType = matcher.group(2);
|
||||
|
||||
URL downloadUrl = getDownloadLink(subtitleListUrl, subtitleId, archiveType);
|
||||
|
||||
subtitles.add(new SubsceneSubtitleDescriptor(name, lang, archiveType, downloadUrl, subtitleListUrl));
|
||||
subtitles.add(new SubsceneSubtitleDescriptor(name, lang, subtitlePage));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
Logger.getLogger(getClass().getName()).log(Level.WARNING, "Cannot parse subtitle node", e);
|
||||
|
@ -199,14 +184,6 @@ public class SubsceneSubtitleClient implements SubtitleProvider {
|
|||
}
|
||||
|
||||
|
||||
protected URL getDownloadLink(URL referer, String subtitleId, String typeId) throws MalformedURLException {
|
||||
String basePath = FileUtilities.getNameWithoutExtension(referer.getFile());
|
||||
String path = String.format("%s-dlpath-%s/%s.zipx", basePath, subtitleId, typeId);
|
||||
|
||||
return new URL(referer.getProtocol(), referer.getHost(), path);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public URI getSubtitleListLink(SearchResult searchResult, String languageName) {
|
||||
return ((HyperLink) searchResult).getURI();
|
||||
|
|
|
@ -2,31 +2,33 @@
|
|||
package net.sourceforge.filebot.web;
|
||||
|
||||
|
||||
import static java.util.Collections.*;
|
||||
import static net.sourceforge.filebot.web.WebRequest.*;
|
||||
import static net.sourceforge.tuned.XPathUtilities.*;
|
||||
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.w3c.dom.Document;
|
||||
|
||||
import net.sourceforge.tuned.FileUtilities;
|
||||
|
||||
|
||||
public class SubsceneSubtitleDescriptor implements SubtitleDescriptor {
|
||||
|
||||
private final String title;
|
||||
private final String language;
|
||||
private String title;
|
||||
private String language;
|
||||
|
||||
private final String archiveType;
|
||||
|
||||
private final URL downloadLink;
|
||||
private final URL referer;
|
||||
private URL subtitlePage;
|
||||
private Map<String, String> subtitleInfo;
|
||||
|
||||
|
||||
public SubsceneSubtitleDescriptor(String title, String language, String archiveType, URL downloadLink, URL referer) {
|
||||
public SubsceneSubtitleDescriptor(String title, String language, URL subtitlePage) {
|
||||
this.title = title;
|
||||
this.language = language;
|
||||
|
||||
this.archiveType = archiveType;
|
||||
|
||||
this.downloadLink = downloadLink;
|
||||
this.referer = referer;
|
||||
this.subtitlePage = subtitlePage;
|
||||
}
|
||||
|
||||
|
||||
|
@ -44,13 +46,42 @@ public class SubsceneSubtitleDescriptor implements SubtitleDescriptor {
|
|||
|
||||
@Override
|
||||
public String getType() {
|
||||
return archiveType;
|
||||
return getSubtitleInfo().get("typeId");
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public ByteBuffer fetch() throws Exception {
|
||||
return WebRequest.fetch(downloadLink, singletonMap("Referer", referer.toString()));
|
||||
// e.g. http://subscene.com/english/Firefly-The-Complete-Series/subtitle-40003-dlpath-20008/rar.zipx
|
||||
String subtitlePagePath = FileUtilities.getNameWithoutExtension(subtitlePage.getFile());
|
||||
String path = String.format("%s-dlpath-%s/%s.zipx", subtitlePagePath, getSubtitleInfo().get("filmId"), getSubtitleInfo().get("typeId"));
|
||||
|
||||
URL downloadLocator = new URL(subtitlePage.getProtocol(), subtitlePage.getHost(), path);
|
||||
Map<String, String> downloadPostData = subtitleInfo;
|
||||
|
||||
HttpURLConnection connection = (HttpURLConnection) downloadLocator.openConnection();
|
||||
connection.addRequestProperty("Referer", subtitlePage.toString());
|
||||
|
||||
return WebRequest.post(connection, downloadPostData);
|
||||
}
|
||||
|
||||
|
||||
private synchronized Map<String, String> getSubtitleInfo() {
|
||||
// extract subtitle information from subtitle page if necessary
|
||||
if (subtitleInfo == null) {
|
||||
try {
|
||||
Document dom = getHtmlDocument(subtitlePage);
|
||||
|
||||
subtitleInfo = new HashMap<String, String>();
|
||||
subtitleInfo.put("subtitleId", selectString("//INPUT[@name='subtitleId']/@value", dom));
|
||||
subtitleInfo.put("typeId", selectString("//INPUT[@name='typeId']/@value", dom));
|
||||
subtitleInfo.put("filmId", selectString("//INPUT[@name='filmId']/@value", dom));
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("Failed to extract subtitle info", e);
|
||||
}
|
||||
}
|
||||
|
||||
return subtitleInfo;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -5,9 +5,13 @@ package net.sourceforge.filebot.web;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStream;
|
||||
import java.io.Reader;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.net.URLEncoder;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Map;
|
||||
|
@ -131,6 +135,44 @@ public final class WebRequest {
|
|||
}
|
||||
|
||||
|
||||
public static ByteBuffer post(HttpURLConnection connection, Map<String, String> parameters) throws IOException {
|
||||
byte[] postData = encodeParameters(parameters).getBytes("UTF-8");
|
||||
|
||||
// add content type and content length headers
|
||||
connection.addRequestProperty("Content-Type", "application/x-www-form-urlencoded");
|
||||
connection.addRequestProperty("Content-Length", String.valueOf(postData.length));
|
||||
|
||||
connection.setRequestMethod("POST");
|
||||
connection.setDoOutput(true);
|
||||
|
||||
// write post data
|
||||
OutputStream out = connection.getOutputStream();
|
||||
out.write(postData);
|
||||
out.close();
|
||||
|
||||
// read response
|
||||
int contentLength = connection.getContentLength();
|
||||
|
||||
InputStream in = connection.getInputStream();
|
||||
ByteBufferOutputStream buffer = new ByteBufferOutputStream(contentLength >= 0 ? contentLength : 32 * 1024);
|
||||
|
||||
try {
|
||||
// read all
|
||||
buffer.transferFully(in);
|
||||
} catch (IOException e) {
|
||||
// if the content length is not known in advance an IOException (Premature EOF)
|
||||
// is always thrown after all the data has been read
|
||||
if (contentLength >= 0) {
|
||||
throw e;
|
||||
}
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
|
||||
return buffer.getByteBuffer();
|
||||
}
|
||||
|
||||
|
||||
private static Charset getCharset(String contentType) {
|
||||
if (contentType != null) {
|
||||
// e.g. Content-Type: text/html; charset=iso-8859-1
|
||||
|
@ -155,6 +197,28 @@ public final class WebRequest {
|
|||
}
|
||||
|
||||
|
||||
public static String encodeParameters(Map<String, String> parameters) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
for (Entry<String, String> entry : parameters.entrySet()) {
|
||||
if (sb.length() > 0)
|
||||
sb.append("&");
|
||||
|
||||
sb.append(entry.getKey());
|
||||
sb.append("=");
|
||||
|
||||
try {
|
||||
sb.append(URLEncoder.encode(entry.getValue(), "UTF-8"));
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
// will never happen
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Dummy constructor to prevent instantiation.
|
||||
*/
|
||||
|
|
|
@ -7,7 +7,8 @@ import static org.junit.Assert.*;
|
|||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.util.LinkedList;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
import org.junit.Test;
|
||||
|
@ -17,7 +18,7 @@ public class SubRipReaderTest {
|
|||
|
||||
@Test
|
||||
public void parse() throws Exception {
|
||||
LinkedList<SubtitleElement> list = new LinkedList<SubtitleElement>();
|
||||
List<SubtitleElement> list = new ArrayList<SubtitleElement>();
|
||||
|
||||
URL resource = new URL("http://www.opensubtitles.org/en/download/file/1951733951.gz");
|
||||
InputStream source = new GZIPInputStream(resource.openStream());
|
||||
|
@ -32,12 +33,12 @@ public class SubRipReaderTest {
|
|||
reader.close();
|
||||
}
|
||||
|
||||
assertEquals(499, list.size(), 0);
|
||||
assertEquals(501, list.size(), 0);
|
||||
|
||||
assertEquals(3455, list.getFirst().getStart(), 0);
|
||||
assertEquals(6799, list.getFirst().getEnd(), 0);
|
||||
assertEquals(3455, list.get(0).getStart(), 0);
|
||||
assertEquals(6799, list.get(0).getEnd(), 0);
|
||||
|
||||
assertEquals("Come with me if you want to live.", list.get(253).getText());
|
||||
assertEquals("Come with me if you want to live.", list.get(254).getText());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ public class IMDbClientTest {
|
|||
assertEquals(2004, movie.getYear());
|
||||
assertEquals(407362, movie.getImdbId(), 0);
|
||||
|
||||
assertEquals(7, results.size(), 0);
|
||||
assertEquals(8, results.size(), 0);
|
||||
}
|
||||
|
||||
|
||||
|
@ -92,7 +92,7 @@ public class IMDbClientTest {
|
|||
|
||||
Episode first = list.get(0);
|
||||
|
||||
assertEquals("Mushishi", first.getSeriesName());
|
||||
assertEquals("Mushi-Shi", first.getSeriesName());
|
||||
assertEquals("Midori no za", first.getTitle());
|
||||
assertEquals("1", first.getEpisode());
|
||||
assertEquals("1", first.getSeason());
|
||||
|
|
|
@ -39,7 +39,7 @@ public class OpenSubtitlesXmlRpcTest {
|
|||
MovieDescriptor sample = (MovieDescriptor) list.get(0);
|
||||
|
||||
// check sample entry
|
||||
assertEquals("\"Babylon 5\" (1994) (TV series)", sample.getName());
|
||||
assertEquals("\"Babylon 5\" (1994)", sample.getName());
|
||||
assertEquals(105946, sample.getImdbId());
|
||||
}
|
||||
|
||||
|
@ -182,7 +182,7 @@ public class OpenSubtitlesXmlRpcTest {
|
|||
ByteBuffer data = list.get(0).fetch();
|
||||
|
||||
// check size
|
||||
assertEquals(48550, data.remaining(), 0);
|
||||
assertEquals(48707, data.remaining(), 0);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ package net.sourceforge.filebot.web;
|
|||
import static org.junit.Assert.*;
|
||||
|
||||
import java.net.URL;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -98,4 +99,16 @@ public class SubsceneSubtitleClientTest {
|
|||
assertEquals(twinpeaksSearchResult.getURL().toString(), subscene.getSubtitleListLink(twinpeaksSearchResult, null).toURL().toString());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void downloadSubtitleArchive() throws Exception {
|
||||
SearchResult selectedResult = subscene.search("firefly").get(0);
|
||||
SubtitleDescriptor subtitleDescriptor = subscene.getSubtitleList(selectedResult, "English").get(1);
|
||||
|
||||
assertEquals(subtitleDescriptor.getName(), "Firefly - The Complete Series");
|
||||
|
||||
ByteBuffer archive = subtitleDescriptor.fetch();
|
||||
assertEquals(254549, archive.remaining());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,8 +17,8 @@ public class TMDbClientTest {
|
|||
|
||||
@Test
|
||||
public void searchByName() throws Exception {
|
||||
List<MovieDescriptor> result = tmdb.searchMovie("transformers");
|
||||
MovieDescriptor movie = result.get(0);
|
||||
List<MovieDescriptor> result = tmdb.searchMovie("Transformers");
|
||||
MovieDescriptor movie = result.get(1);
|
||||
|
||||
assertEquals("Transformers", movie.getName());
|
||||
assertEquals(2007, movie.getYear());
|
||||
|
|
|
@ -107,7 +107,7 @@ public class TVDotComClientTest {
|
|||
public void getEpisodeListEncoding() throws Exception {
|
||||
List<Episode> list = tvdotcom.getEpisodeList(tvdotcom.search("Lost").get(0), 3);
|
||||
|
||||
Episode episode = list.get(16);
|
||||
Episode episode = list.get(13);
|
||||
|
||||
assertEquals("Lost", episode.getSeriesName());
|
||||
assertEquals("Exposé", episode.getTitle());
|
||||
|
|
|
@ -40,7 +40,7 @@ public class TheTVDBClientTest {
|
|||
public void searchGerman() throws Exception {
|
||||
List<SearchResult> results = thetvdb.search("buffy", Locale.GERMAN);
|
||||
|
||||
assertEquals(3, results.size());
|
||||
assertEquals(4, results.size());
|
||||
|
||||
TheTVDBSearchResult first = (TheTVDBSearchResult) results.get(0);
|
||||
|
||||
|
@ -70,9 +70,9 @@ public class TheTVDBClientTest {
|
|||
// check special episode
|
||||
Episode last = list.get(list.size() - 1);
|
||||
assertEquals("Buffy the Vampire Slayer", last.getSeriesName());
|
||||
assertEquals("Season 5 Overview", last.getTitle());
|
||||
assertEquals("Special 17", last.getEpisode());
|
||||
assertEquals("0", last.getSeason());
|
||||
assertEquals("Unaired Pilot", last.getTitle());
|
||||
assertEquals("Special 1", last.getEpisode());
|
||||
assertEquals("1", last.getSeason());
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue