From e3ba7b79e376c8af4636ee321e5698a00e3fdbf9 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Tue, 15 Jan 2013 11:28:19 +0000 Subject: [PATCH] * fix subscene scraper --- .../web/SubsceneSubtitleDescriptor.java | 35 +++++-------------- .../sourceforge/filebot/web/WebRequest.java | 4 ++- .../web/SubsceneSubtitleClientTest.java | 12 ++++--- 3 files changed, 19 insertions(+), 32 deletions(-) diff --git a/source/net/sourceforge/filebot/web/SubsceneSubtitleDescriptor.java b/source/net/sourceforge/filebot/web/SubsceneSubtitleDescriptor.java index 6f0c97ee..b81d1494 100644 --- a/source/net/sourceforge/filebot/web/SubsceneSubtitleDescriptor.java +++ b/source/net/sourceforge/filebot/web/SubsceneSubtitleDescriptor.java @@ -2,17 +2,15 @@ package net.sourceforge.filebot.web; -import static net.sourceforge.filebot.web.WebRequest.*; +import static java.util.Collections.*; import static net.sourceforge.tuned.XPathUtilities.*; -import java.net.HttpURLConnection; +import java.io.IOException; import java.net.URL; import java.nio.ByteBuffer; -import java.util.HashMap; -import java.util.Map; import org.w3c.dom.Document; -import org.w3c.dom.Node; +import org.xml.sax.SAXException; public class SubsceneSubtitleDescriptor implements SubtitleDescriptor { @@ -21,7 +19,6 @@ public class SubsceneSubtitleDescriptor implements SubtitleDescriptor { private String language; private URL subtitlePage; - private Map subtitleInfo; public SubsceneSubtitleDescriptor(String title, String language, URL subtitlePage) { @@ -51,30 +48,14 @@ public class SubsceneSubtitleDescriptor implements SubtitleDescriptor { @Override public ByteBuffer fetch() throws Exception { - URL downloadLink = new URL(subtitlePage.getProtocol(), subtitlePage.getHost(), "/subtitle/download"); - - HttpURLConnection connection = (HttpURLConnection) downloadLink.openConnection(); - connection.addRequestProperty("Referer", subtitlePage.toString()); - - return WebRequest.post(connection, getSubtitleInfo()); + return WebRequest.fetch(getDownloadLink(), 0, singletonMap("Referer", subtitlePage.toString())); } - private synchronized Map getSubtitleInfo() { - // extract subtitle information from subtitle page if necessary - if (subtitleInfo == null) { - subtitleInfo = new HashMap(); - try { - Document dom = getHtmlDocument(subtitlePage); - for (Node input : selectNodes("id('dl')//INPUT[@name]", dom)) { - subtitleInfo.put(getAttribute("name", input), getAttribute("value", input)); - } - } catch (Exception e) { - e.printStackTrace(); - throw new RuntimeException("Failed to extract subtitle info", e); - } - } - return subtitleInfo; + private URL getDownloadLink() throws IOException, SAXException { + Document page = WebRequest.getHtmlDocument(subtitlePage); + String file = selectString("id('downloadButton')/@href", page); + return new URL(subtitlePage.getProtocol(), subtitlePage.getHost(), file); } diff --git a/source/net/sourceforge/filebot/web/WebRequest.java b/source/net/sourceforge/filebot/web/WebRequest.java index 31793d6a..0d99102e 100644 --- a/source/net/sourceforge/filebot/web/WebRequest.java +++ b/source/net/sourceforge/filebot/web/WebRequest.java @@ -133,7 +133,9 @@ public final class WebRequest { public static ByteBuffer fetch(URL url, long ifModifiedSince, Map requestParameters) throws IOException { URLConnection connection = url.openConnection(); - connection.setIfModifiedSince(ifModifiedSince); + if (ifModifiedSince > 0) { + connection.setIfModifiedSince(ifModifiedSince); + } if (requestParameters != null) { for (Entry parameter : requestParameters.entrySet()) { diff --git a/test/net/sourceforge/filebot/web/SubsceneSubtitleClientTest.java b/test/net/sourceforge/filebot/web/SubsceneSubtitleClientTest.java index 57e7d10a..2ccc976e 100644 --- a/test/net/sourceforge/filebot/web/SubsceneSubtitleClientTest.java +++ b/test/net/sourceforge/filebot/web/SubsceneSubtitleClientTest.java @@ -9,6 +9,8 @@ import java.nio.ByteBuffer; import java.util.List; import java.util.Map; +import net.sourceforge.filebot.vfs.ArchiveType; +import net.sourceforge.filebot.vfs.MemoryFile; import net.sourceforge.filebot.web.SubsceneSubtitleClient.SubsceneSearchResult; import org.junit.BeforeClass; @@ -50,7 +52,7 @@ public class SubsceneSubtitleClientTest { @Test public void search2() throws Exception { - List results = subscene.search("Avatar 2009"); + List results = subscene.search("firefly"); SubsceneSearchResult result = (SubsceneSearchResult) results.get(0); assertEquals("Firefly - The Complete Series (2002)", result.toString()); @@ -100,10 +102,12 @@ public class SubsceneSubtitleClientTest { public void downloadSubtitleArchive() throws Exception { SearchResult selectedResult = subscene.search("firefly").get(0); SubtitleDescriptor subtitleDescriptor = subscene.getSubtitleList(selectedResult, "English").get(0); - assertEquals("Firefly.S01E00-13.DVDRip-Rogue.eng-RETAIL", subtitleDescriptor.getName()); + assertEquals("Firefly The Complete Series", subtitleDescriptor.getName()); - ByteBuffer archive = subtitleDescriptor.fetch(); - assertEquals(254549, archive.remaining()); + ByteBuffer data = subtitleDescriptor.fetch(); + Iterable archive = ArchiveType.RAR.fromData(data); + MemoryFile file = archive.iterator().next(); + assertEquals("Firefly - 1x01 - Serenity.srt", file.getName()); } }