From f6e4f1bb8fe8ec7ddca73a92fcb81881ecbbf43f Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Wed, 16 Oct 2013 10:29:51 +0000 Subject: [PATCH] * optimize TheTVDB network requests (and make sure gzipped compression is used at all times) --- .../filebot/web/AbstractCachedResource.java | 4 + .../filebot/web/TheTVDBClient.java | 68 ++-------- .../sourceforge/filebot/web/WebRequest.java | 128 ++++++++---------- 3 files changed, 71 insertions(+), 129 deletions(-) diff --git a/source/net/sourceforge/filebot/web/AbstractCachedResource.java b/source/net/sourceforge/filebot/web/AbstractCachedResource.java index 8e3d8105..d521f84c 100644 --- a/source/net/sourceforge/filebot/web/AbstractCachedResource.java +++ b/source/net/sourceforge/filebot/web/AbstractCachedResource.java @@ -1,5 +1,6 @@ package net.sourceforge.filebot.web; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.Serializable; import java.net.URL; @@ -124,6 +125,9 @@ public abstract class AbstractCachedResource { Thread.sleep(retryWaitTime); } return fetchData(url, lastModified); + } catch (FileNotFoundException e) { + // if the resource doesn't exist no need for retries + throw e; } catch (IOException e) { if (i >= 0 && i >= retries) { throw e; diff --git a/source/net/sourceforge/filebot/web/TheTVDBClient.java b/source/net/sourceforge/filebot/web/TheTVDBClient.java index 3adac03c..89500f66 100644 --- a/source/net/sourceforge/filebot/web/TheTVDBClient.java +++ b/source/net/sourceforge/filebot/web/TheTVDBClient.java @@ -3,17 +3,14 @@ package net.sourceforge.filebot.web; import static java.util.Arrays.*; import static net.sourceforge.filebot.web.EpisodeUtilities.*; import static net.sourceforge.filebot.web.WebRequest.*; -import static net.sourceforge.tuned.FileUtilities.*; import static net.sourceforge.tuned.XPathUtilities.*; import java.io.FileNotFoundException; import java.io.IOException; -import java.io.InputStreamReader; import java.io.Serializable; import java.net.MalformedURLException; import java.net.URI; import java.net.URL; -import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.EnumMap; import java.util.EnumSet; @@ -25,8 +22,6 @@ import java.util.Map.Entry; import java.util.Random; import java.util.logging.Level; import java.util.logging.Logger; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; import javax.swing.Icon; @@ -34,7 +29,6 @@ import net.sourceforge.filebot.Cache; import net.sourceforge.filebot.ResourceManager; import net.sourceforge.filebot.web.TheTVDBClient.BannerDescriptor.BannerProperty; import net.sourceforge.filebot.web.TheTVDBClient.SeriesInfo.SeriesProperty; -import net.sourceforge.tuned.ByteBufferInputStream; import net.sourceforge.tuned.FileUtilities; import org.w3c.dom.Document; @@ -124,13 +118,13 @@ public class TheTVDBClient extends AbstractEpisodeListProvider { @Override public List fetchEpisodeList(SearchResult searchResult, SortOrder sortOrder, Locale locale) throws Exception { TheTVDBSearchResult series = (TheTVDBSearchResult) searchResult; - Document seriesRecord = getSeriesRecord(series, getLanguageCode(locale)); + Document dom = getXmlResource(MirrorType.XML, "/api/" + apikey + "/series/" + series.getSeriesId() + "/all/" + locale.getLanguage() + ".xml"); // we could get the series name from the search result, but the language may not match the given parameter - String seriesName = selectString("Data/Series/SeriesName", seriesRecord); - Date seriesStartDate = Date.parse(selectString("Data/Series/FirstAired", seriesRecord), "yyyy-MM-dd"); + String seriesName = selectString("Data/Series/SeriesName", dom); + Date seriesStartDate = Date.parse(selectString("Data/Series/FirstAired", dom), "yyyy-MM-dd"); - List nodes = selectNodes("Data/Episode", seriesRecord); + List nodes = selectNodes("Data/Episode", dom); List episodes = new ArrayList(nodes.size()); List specials = new ArrayList(5); @@ -185,54 +179,6 @@ public class TheTVDBClient extends AbstractEpisodeListProvider { return episodes; } - public Document getSeriesRecord(final TheTVDBSearchResult searchResult, final String languageCode) throws Exception { - final String path = "/api/" + apikey + "/series/" + searchResult.getSeriesId() + "/all/" + languageCode + ".zip"; - final MirrorType mirror = MirrorType.ZIP; - - CachedXmlResource record = new CachedXmlResource(path) { - @Override - protected URL getResourceLocation(String resource) throws IOException { - return getResourceURL(mirror, path); - } - - @Override - protected String fetchData(URL url, long lastModified) throws IOException { - try { - ByteBuffer data = WebRequest.fetchIfModified(url, lastModified); - if (data == null) - return null; // not modified - - ZipInputStream zipInputStream = new ZipInputStream(new ByteBufferInputStream(data)); - ZipEntry zipEntry; - - try { - String seriesRecordName = languageCode + ".xml"; - - while ((zipEntry = zipInputStream.getNextEntry()) != null) { - if (seriesRecordName.equals(zipEntry.getName())) { - return readAll(new InputStreamReader(zipInputStream, "UTF-8")); - } - } - - // zip file must contain the series record - throw new FileNotFoundException(String.format("Archive must contain %s: %s", seriesRecordName, getResourceURL(mirror, path))); - } finally { - zipInputStream.close(); - } - } catch (FileNotFoundException e) { - throw new FileNotFoundException(String.format("Series record not found: %s [%s]: %s", searchResult.getName(), languageCode, getResourceURL(mirror, path))); - } - } - - @Override - public String process(String data) throws Exception { - return data; - } - }; - - return record.getDocument(); - } - public TheTVDBSearchResult lookupByID(int id, Locale locale) throws Exception { TheTVDBSearchResult cachedItem = getCache().getData("lookupByID", id, locale, TheTVDBSearchResult.class); if (cachedItem != null) { @@ -342,7 +288,11 @@ public class TheTVDBClient extends AbstractEpisodeListProvider { }; // fetch data or retrieve from cache - return resource.getDocument(); + try { + return resource.getDocument(); + } catch (FileNotFoundException e) { + throw new FileNotFoundException("Resource not found: " + getResourceURL(mirrorType, path)); // simplify error message + } } protected URL getResourceURL(MirrorType mirrorType, String path) throws IOException { diff --git a/source/net/sourceforge/filebot/web/WebRequest.java b/source/net/sourceforge/filebot/web/WebRequest.java index 0d99102e..52ba31c0 100644 --- a/source/net/sourceforge/filebot/web/WebRequest.java +++ b/source/net/sourceforge/filebot/web/WebRequest.java @@ -1,7 +1,5 @@ - package net.sourceforge.filebot.web; - import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -42,19 +40,16 @@ import org.w3c.dom.Document; import org.xml.sax.InputSource; import org.xml.sax.SAXException; - public final class WebRequest { - + public static Document getHtmlDocument(URL url) throws IOException, SAXException { return getHtmlDocument(url.openConnection()); } - - + public static Document getHtmlDocument(URLConnection connection) throws IOException, SAXException { return getHtmlDocument(getReader(connection)); } - - + public static Reader getReader(URLConnection connection) throws IOException { try { connection.addRequestProperty("Accept-Encoding", "gzip,deflate"); @@ -62,51 +57,45 @@ public final class WebRequest { } catch (IllegalStateException e) { // too bad, can't request gzipped document anymore } - + Charset charset = getCharset(connection.getContentType()); String encoding = connection.getContentEncoding(); - + InputStream inputStream = connection.getInputStream(); - + if ("gzip".equalsIgnoreCase(encoding)) inputStream = new GZIPInputStream(inputStream); else if ("deflate".equalsIgnoreCase(encoding)) { inputStream = new InflaterInputStream(inputStream, new Inflater(true)); } - + return new InputStreamReader(inputStream, charset); } - - + public static Document getHtmlDocument(Reader reader) throws SAXException, IOException { DOMParser parser = new DOMParser(); parser.setFeature("http://xml.org/sax/features/namespaces", false); parser.parse(new InputSource(reader)); - + return parser.getDocument(); } - - + public static Document getHtmlDocument(String html) throws SAXException, IOException { return getHtmlDocument(new StringReader(html)); } - - + public static Document getDocument(URL url) throws IOException, SAXException { return getDocument(url.openConnection()); } - - + public static Document getDocument(URLConnection connection) throws IOException, SAXException { return getDocument(new InputSource(getReader(connection))); } - - + public static Document getDocument(String xml) throws IOException, SAXException { return getDocument(new InputSource(new StringReader(xml))); } - - + public static Document getDocument(InputSource source) throws IOException, SAXException { try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); @@ -119,40 +108,48 @@ public final class WebRequest { throw new RuntimeException(e); } } - - + public static ByteBuffer fetch(URL resource) throws IOException { return fetch(resource, 0, null); } - - + public static ByteBuffer fetchIfModified(URL resource, long ifModifiedSince) throws IOException { return fetch(resource, ifModifiedSince, null); } - - + public static ByteBuffer fetch(URL url, long ifModifiedSince, Map requestParameters) throws IOException { URLConnection connection = url.openConnection(); if (ifModifiedSince > 0) { connection.setIfModifiedSince(ifModifiedSince); } - + + try { + connection.addRequestProperty("Accept-Encoding", "gzip"); + connection.addRequestProperty("Accept-Charset", "UTF-8"); + } catch (IllegalStateException e) { + // too bad, can't request gzipped data + } + if (requestParameters != null) { for (Entry parameter : requestParameters.entrySet()) { connection.addRequestProperty(parameter.getKey(), parameter.getValue()); } } - + int contentLength = connection.getContentLength(); - + String encoding = connection.getContentEncoding(); + InputStream in = connection.getInputStream(); + if ("gzip".equalsIgnoreCase(encoding)) { + in = new GZIPInputStream(in); + } + ByteBufferOutputStream buffer = new ByteBufferOutputStream(contentLength >= 0 ? contentLength : 4 * 1024); - try { // read all buffer.transferFully(in); } catch (IOException e) { - // if the content length is not known in advance an IOException (Premature EOF) + // if the content length is not known in advance an IOException (Premature EOF) // is always thrown after all the data has been read if (contentLength >= 0) { throw e; @@ -160,48 +157,46 @@ public final class WebRequest { } finally { in.close(); } - + // no data, e.g. If-Modified-Since requests if (contentLength < 0 && buffer.getByteBuffer().remaining() == 0) return null; - + return buffer.getByteBuffer(); } - - + public static ByteBuffer post(HttpURLConnection connection, Map parameters) throws IOException { return post(connection, encodeParameters(parameters, true).getBytes("UTF-8"), "application/x-www-form-urlencoded"); } - - + public static ByteBuffer post(HttpURLConnection connection, byte[] postData, String contentType) throws IOException { connection.addRequestProperty("Content-Length", String.valueOf(postData.length)); connection.addRequestProperty("Content-Type", contentType); connection.setRequestMethod("POST"); connection.setDoOutput(true); - + // write post data OutputStream out = connection.getOutputStream(); out.write(postData); out.close(); - + // read response int contentLength = connection.getContentLength(); String encoding = connection.getContentEncoding(); - + InputStream in = connection.getInputStream(); if ("gzip".equalsIgnoreCase(encoding)) in = new GZIPInputStream(in); else if ("deflate".equalsIgnoreCase(encoding)) { in = new InflaterInputStream(in, new Inflater(true)); } - + ByteBufferOutputStream buffer = new ByteBufferOutputStream(contentLength >= 0 ? contentLength : 32 * 1024); try { // read all buffer.transferFully(in); } catch (IOException e) { - // if the content length is not known in advance an IOException (Premature EOF) + // if the content length is not known in advance an IOException (Premature EOF) // is always thrown after all the data has been read if (contentLength >= 0) { throw e; @@ -209,16 +204,15 @@ public final class WebRequest { } finally { in.close(); } - + return buffer.getByteBuffer(); } - - + private static Charset getCharset(String contentType) { if (contentType != null) { // e.g. Content-Type: text/html; charset=iso-8859-1 Matcher matcher = Pattern.compile("charset=(\\p{Graph}+)").matcher(contentType); - + if (matcher.find()) { try { return Charset.forName(matcher.group(1)); @@ -226,37 +220,35 @@ public final class WebRequest { Logger.getLogger(WebRequest.class.getName()).log(Level.WARNING, e.getMessage()); } } - + // use http default encoding only for text/html if (contentType.equals("text/html")) { return Charset.forName("ISO-8859-1"); } } - + // use UTF-8 if we don't know any better return Charset.forName("UTF-8"); } - - + public static String encodeParameters(Map parameters, boolean unicode) { StringBuilder sb = new StringBuilder(); - + for (Entry entry : parameters.entrySet()) { if (sb.length() > 0) { sb.append("&"); } - + sb.append(entry.getKey()); if (entry.getValue() != null) { sb.append("="); sb.append(encode(entry.getValue().toString(), unicode)); } } - + return sb.toString(); } - - + public static String encode(String string, boolean unicode) { try { return URLEncoder.encode(string, unicode ? "UTF-8" : "ISO-8859-1"); @@ -264,28 +256,25 @@ public final class WebRequest { throw new RuntimeException(e); } } - - + public static SSLSocketFactory createIgnoreCertificateSocketFactory() { // create a trust manager that does not validate certificate chains TrustManager trustAnyCertificate = new X509TrustManager() { - + @Override public X509Certificate[] getAcceptedIssuers() { return null; } - - + @Override public void checkClientTrusted(X509Certificate[] certs, String authType) { } - - + @Override public void checkServerTrusted(X509Certificate[] certs, String authType) { } }; - + try { SSLContext sc = SSLContext.getInstance("SSL"); sc.init(null, new TrustManager[] { trustAnyCertificate }, new SecureRandom()); @@ -294,8 +283,7 @@ public final class WebRequest { throw new RuntimeException(e); } } - - + /** * Dummy constructor to prevent instantiation. */