* fix subscene scraper

This commit is contained in:
Reinhard Pointner 2013-01-15 11:28:19 +00:00
parent 6384e97b64
commit e3ba7b79e3
3 changed files with 19 additions and 32 deletions

View File

@ -2,17 +2,15 @@
package net.sourceforge.filebot.web; package net.sourceforge.filebot.web;
import static net.sourceforge.filebot.web.WebRequest.*; import static java.util.Collections.*;
import static net.sourceforge.tuned.XPathUtilities.*; import static net.sourceforge.tuned.XPathUtilities.*;
import java.net.HttpURLConnection; import java.io.IOException;
import java.net.URL; import java.net.URL;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.Map;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Node; import org.xml.sax.SAXException;
public class SubsceneSubtitleDescriptor implements SubtitleDescriptor { public class SubsceneSubtitleDescriptor implements SubtitleDescriptor {
@ -21,7 +19,6 @@ public class SubsceneSubtitleDescriptor implements SubtitleDescriptor {
private String language; private String language;
private URL subtitlePage; private URL subtitlePage;
private Map<String, String> subtitleInfo;
public SubsceneSubtitleDescriptor(String title, String language, URL subtitlePage) { public SubsceneSubtitleDescriptor(String title, String language, URL subtitlePage) {
@ -51,30 +48,14 @@ public class SubsceneSubtitleDescriptor implements SubtitleDescriptor {
@Override @Override
public ByteBuffer fetch() throws Exception { public ByteBuffer fetch() throws Exception {
URL downloadLink = new URL(subtitlePage.getProtocol(), subtitlePage.getHost(), "/subtitle/download"); return WebRequest.fetch(getDownloadLink(), 0, singletonMap("Referer", subtitlePage.toString()));
HttpURLConnection connection = (HttpURLConnection) downloadLink.openConnection();
connection.addRequestProperty("Referer", subtitlePage.toString());
return WebRequest.post(connection, getSubtitleInfo());
} }
private synchronized Map<String, String> getSubtitleInfo() { private URL getDownloadLink() throws IOException, SAXException {
// extract subtitle information from subtitle page if necessary Document page = WebRequest.getHtmlDocument(subtitlePage);
if (subtitleInfo == null) { String file = selectString("id('downloadButton')/@href", page);
subtitleInfo = new HashMap<String, String>(); return new URL(subtitlePage.getProtocol(), subtitlePage.getHost(), file);
try {
Document dom = getHtmlDocument(subtitlePage);
for (Node input : selectNodes("id('dl')//INPUT[@name]", dom)) {
subtitleInfo.put(getAttribute("name", input), getAttribute("value", input));
}
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException("Failed to extract subtitle info", e);
}
}
return subtitleInfo;
} }

View File

@ -133,7 +133,9 @@ public final class WebRequest {
public static ByteBuffer fetch(URL url, long ifModifiedSince, Map<String, String> requestParameters) throws IOException { public static ByteBuffer fetch(URL url, long ifModifiedSince, Map<String, String> requestParameters) throws IOException {
URLConnection connection = url.openConnection(); URLConnection connection = url.openConnection();
connection.setIfModifiedSince(ifModifiedSince); if (ifModifiedSince > 0) {
connection.setIfModifiedSince(ifModifiedSince);
}
if (requestParameters != null) { if (requestParameters != null) {
for (Entry<String, String> parameter : requestParameters.entrySet()) { for (Entry<String, String> parameter : requestParameters.entrySet()) {

View File

@ -9,6 +9,8 @@ import java.nio.ByteBuffer;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import net.sourceforge.filebot.vfs.ArchiveType;
import net.sourceforge.filebot.vfs.MemoryFile;
import net.sourceforge.filebot.web.SubsceneSubtitleClient.SubsceneSearchResult; import net.sourceforge.filebot.web.SubsceneSubtitleClient.SubsceneSearchResult;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ -50,7 +52,7 @@ public class SubsceneSubtitleClientTest {
@Test @Test
public void search2() throws Exception { public void search2() throws Exception {
List<SearchResult> results = subscene.search("Avatar 2009"); List<SearchResult> results = subscene.search("firefly");
SubsceneSearchResult result = (SubsceneSearchResult) results.get(0); SubsceneSearchResult result = (SubsceneSearchResult) results.get(0);
assertEquals("Firefly - The Complete Series (2002)", result.toString()); assertEquals("Firefly - The Complete Series (2002)", result.toString());
@ -100,10 +102,12 @@ public class SubsceneSubtitleClientTest {
public void downloadSubtitleArchive() throws Exception { public void downloadSubtitleArchive() throws Exception {
SearchResult selectedResult = subscene.search("firefly").get(0); SearchResult selectedResult = subscene.search("firefly").get(0);
SubtitleDescriptor subtitleDescriptor = subscene.getSubtitleList(selectedResult, "English").get(0); SubtitleDescriptor subtitleDescriptor = subscene.getSubtitleList(selectedResult, "English").get(0);
assertEquals("Firefly.S01E00-13.DVDRip-Rogue.eng-RETAIL", subtitleDescriptor.getName()); assertEquals("Firefly The Complete Series", subtitleDescriptor.getName());
ByteBuffer archive = subtitleDescriptor.fetch(); ByteBuffer data = subtitleDescriptor.fetch();
assertEquals(254549, archive.remaining()); Iterable<MemoryFile> archive = ArchiveType.RAR.fromData(data);
MemoryFile file = archive.iterator().next();
assertEquals("Firefly - 1x01 - Serenity.srt", file.getName());
} }
} }