Merge branch 'subtree'
[fanfix.git] / src / be / nikiroo / utils / Downloader.java
index e01ec1dcc24cb3abccc28bb3618b62edf0ad7a54..4191d0aea0da85511e2741c50c2a6c83806f612f 100644 (file)
@@ -29,6 +29,8 @@ public class Downloader {
        private String UA;
        private CookieManager cookies;
        private TraceHandler tracer = new TraceHandler();
+       private Cache cache;
+       private boolean offline;
 
        /**
         * Create a new {@link Downloader}.
@@ -37,14 +39,56 @@ public class Downloader {
         *            the User-Agent to use to download the resources -- note that
         *            some websites require one, some actively blacklist real UAs
         *            like the one from wget, some whitelist a couple of browsers
-        *            only (!)
+        *            only (!) -- can be NULL
         */
        public Downloader(String UA) {
+               this(UA, null);
+       }
+
+       /**
+        * Create a new {@link Downloader}.
+        * 
+        * @param UA
+        *            the User-Agent to use to download the resources -- note that
+        *            some websites require one, some actively blacklist real UAs
+        *            like the one from wget, some whitelist a couple of browsers
+        *            only (!) -- can be NULL
+        * @param cache
+        *            the {@link Cache} to use for all access (can be NULL)
+        */
+       public Downloader(String UA, Cache cache) {
                this.UA = UA;
 
-               cookies = new CookieManager();
-               cookies.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
+               cookies = new CookieManager(null, CookiePolicy.ACCEPT_ALL);
                CookieHandler.setDefault(cookies);
+
+               setCache(cache);
+       }
+       
+       /**
+        * This {@link Downloader} is forbidden to try and connect to the network.
+        * <p>
+        * If TRUE, it will only check the cache if any.
+        * <p>
+        * Default is FALSE.
+        * 
+        * @return TRUE if offline
+        */
+       public boolean isOffline() {
+               return offline;
+       }
+       
+       /**
+        * This {@link Downloader} is forbidden to try and connect to the network.
+        * <p>
+        * If TRUE, it will only check the cache if any.
+        * <p>
+        * Default is FALSE.
+        * 
+        * @param offline TRUE for offline, FALSE for online
+        */
+       public void setOffline(boolean offline) {
+               this.offline = offline;
        }
 
        /**
@@ -70,6 +114,25 @@ public class Downloader {
                this.tracer = tracer;
        }
 
+       /**
+        * The {@link Cache} to use for all access (can be NULL).
+        * 
+        * @return the cache
+        */
+       public Cache getCache() {
+               return cache;
+       }
+
+       /**
+        * The {@link Cache} to use for all access (can be NULL).
+        * 
+        * @param cache
+        *            the new cache
+        */
+       public void setCache(Cache cache) {
+               this.cache = cache;
+       }
+
        /**
         * Clear all the cookies currently in the jar.
         * <p>
@@ -90,7 +153,26 @@ public class Downloader {
         *             in case of I/O error
         **/
        public InputStream open(URL url) throws IOException {
-               return open(url, url, url, null, null, null, null);
+               return open(url, false);
+       }
+
+       /**
+        * Open the given {@link URL} and update the cookies.
+        * 
+        * @param url
+        *            the {@link URL} to open
+        * @param stable
+        *            stable a stable file (that doesn't change too often) --
+        *            parameter used to check if the file is too old to keep or not
+        *            in the cache (default is false)
+        * 
+        * @return the {@link InputStream} of the opened page
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        **/
+       public InputStream open(URL url, boolean stable) throws IOException {
+               return open(url, url, url, null, null, null, null, stable);
        }
 
        /**
@@ -112,13 +194,46 @@ public class Downloader {
         * @return the {@link InputStream} of the opened page
         * 
         * @throws IOException
-        *             in case of I/O error
+        *             in case of I/O error (including offline mode + not in cache)
         */
        public InputStream open(URL url, URL currentReferer,
                        Map<String, String> cookiesValues, Map<String, String> postParams,
                        Map<String, String> getParams, String oauth) throws IOException {
+               return open(url, currentReferer, cookiesValues, postParams, getParams,
+                               oauth, false);
+       }
+
+       /**
+        * Open the given {@link URL} and update the cookies.
+        * 
+        * @param url
+        *            the {@link URL} to open
+        * @param currentReferer
+        *            the current referer, for websites that needs this info
+        * @param cookiesValues
+        *            the cookies
+        * @param postParams
+        *            the POST parameters
+        * @param getParams
+        *            the GET parameters (priority over POST)
+        * @param oauth
+        *            OAuth authorization (aka, "bearer XXXXXXX")
+        * @param stable
+        *            stable a stable file (that doesn't change too often) --
+        *            parameter used to check if the file is too old to keep or not
+        *            in the cache (default is false)
+        * 
+        * @return the {@link InputStream} of the opened page
+        * 
+        * @throws IOException
+        *             in case of I/O error (including offline mode + not in cache)
+        */
+       public InputStream open(URL url, URL currentReferer,
+                       Map<String, String> cookiesValues, Map<String, String> postParams,
+                       Map<String, String> getParams, String oauth, boolean stable)
+                       throws IOException {
                return open(url, url, currentReferer, cookiesValues, postParams,
-                               getParams, oauth);
+                               getParams, oauth, stable);
        }
 
        /**
@@ -127,23 +242,52 @@ public class Downloader {
         * @param url
         *            the {@link URL} to open
         * @param originalUrl
-        *            the original {@link URL} before any redirection occurs
+        *            the original {@link URL} before any redirection occurs, which
+        *            is also used for the cache ID if needed (so we can retrieve
+        *            the content with this URL if needed)
+        * @param currentReferer
+        *            the current referer, for websites that needs this info
+        * @param cookiesValues
+        *            the cookies
         * @param postParams
         *            the POST parameters
         * @param getParams
         *            the GET parameters (priority over POST)
         * @param oauth
         *            OAuth authorisation (aka, "bearer XXXXXXX")
+        * @param stable
+        *            a stable file (that doesn't change too often) -- parameter
+        *            used to check if the file is too old to keep or not in the
+        *            cache
+        * 
         * @return the {@link InputStream} of the opened page
         * 
         * @throws IOException
-        *             in case of I/O error
+        *             in case of I/O error (including offline mode + not in cache)
         */
-       private InputStream open(URL url, final URL originalUrl,
-                       URL currentReferer, Map<String, String> cookiesValues,
-                       Map<String, String> postParams, Map<String, String> getParams,
-                       String oauth) throws IOException {
+       public InputStream open(URL url, final URL originalUrl, URL currentReferer,
+                       Map<String, String> cookiesValues, Map<String, String> postParams,
+                       Map<String, String> getParams, String oauth, boolean stable)
+                       throws IOException {
+
+               tracer.trace("Request: " + url);
 
+               if (cache != null) {
+                       InputStream in = cache.load(originalUrl, false, stable);
+                       if (in != null) {
+                               tracer.trace("Use the cache: " + url);
+                               tracer.trace("Original URL : " + originalUrl);
+                               return in;
+                       }
+               }
+
+               String protocol = originalUrl == null ? null : originalUrl
+                               .getProtocol();
+               if (isOffline() && !"file".equalsIgnoreCase(protocol)) {
+                       tracer.error("Downloader OFFLINE, cannot proceed to URL: " + url);
+                       throw new IOException("Downloader is currently OFFLINE, cannot download: " + url);
+               }
+               
                tracer.trace("Download: " + url);
 
                URLConnection conn = openConnectionWithCookies(url, currentReferer,
@@ -155,9 +299,9 @@ public class Downloader {
                        params = postParams;
                }
 
+               StringBuilder requestData = null;
                if ((params != null || oauth != null)
                                && conn instanceof HttpURLConnection) {
-                       StringBuilder requestData = null;
                        if (params != null) {
                                requestData = new StringBuilder();
                                for (Map.Entry<String, String> param : params.entrySet()) {
@@ -170,15 +314,14 @@ public class Downloader {
                                                        String.valueOf(param.getValue()), "UTF-8"));
                                }
 
-                               conn.setDoOutput(true);
-
                                if (getParams == null && postParams != null) {
                                        ((HttpURLConnection) conn).setRequestMethod("POST");
                                }
 
                                conn.setRequestProperty("Content-Type",
                                                "application/x-www-form-urlencoded");
-                               conn.setRequestProperty("charset", "utf-8");
+                               conn.setRequestProperty("Content-Length",
+                                               Integer.toString(requestData.length()));
                        }
 
                        if (oauth != null) {
@@ -186,31 +329,86 @@ public class Downloader {
                        }
 
                        if (requestData != null) {
+                               conn.setDoOutput(true);
                                OutputStreamWriter writer = new OutputStreamWriter(
                                                conn.getOutputStream());
-
-                               writer.write(requestData.toString());
-                               writer.flush();
-                               writer.close();
+                               try {
+                                       writer.write(requestData.toString());
+                                       writer.flush();
+                               } finally {
+                                       writer.close();
+                               }
                        }
                }
 
+               // Manual redirection, much better for POST data
+               if (conn instanceof HttpURLConnection) {
+                       ((HttpURLConnection) conn).setInstanceFollowRedirects(false);
+               }
+
                conn.connect();
 
                // Check if redirect
-               if (conn instanceof HttpURLConnection
-                               && ((HttpURLConnection) conn).getResponseCode() / 100 == 3) {
-                       String newUrl = conn.getHeaderField("Location");
-                       return open(new URL(newUrl), originalUrl, currentReferer,
-                                       cookiesValues, postParams, getParams, oauth);
-               }
+               // BEWARE! POST data cannot be redirected (some webservers complain) for
+               // HTTP codes 302 and 303
+               if (conn instanceof HttpURLConnection) {
+                       int repCode = 0;
+                       try {
+                               // Can fail in some circumstances
+                               repCode = ((HttpURLConnection) conn).getResponseCode();
+                       } catch (IOException e) {
+                       }
 
-               InputStream in = conn.getInputStream();
-               if ("gzip".equals(conn.getContentEncoding())) {
-                       in = new GZIPInputStream(in);
+                       if (repCode / 100 == 3) {
+                               String newUrl = conn.getHeaderField("Location");
+                               return open(new URL(newUrl), originalUrl, currentReferer,
+                                               cookiesValues, //
+                                               (repCode == 302 || repCode == 303) ? null : postParams, //
+                                               getParams, oauth, stable);
+                       }
                }
 
-               return in;
+               try {
+                       InputStream in = conn.getInputStream();
+                       if ("gzip".equals(conn.getContentEncoding())) {
+                               in = new GZIPInputStream(in);
+                       }
+
+                       if (in == null) {
+                               throw new IOException("No InputStream!");
+                       }
+
+                       if (cache != null) {
+                               String size = conn.getContentLength() < 0 ? "unknown size"
+                                               : StringUtils.formatNumber(conn.getContentLength())
+                                                               + "bytes";
+                               tracer.trace("Save to cache (" + size + "): " + originalUrl);
+                               try {
+                                       try {
+                                               long bytes = cache.save(in, originalUrl);
+                                               tracer.trace("Saved to cache: "
+                                                               + StringUtils.formatNumber(bytes) + "bytes");
+                                       } finally {
+                                               in.close();
+                                       }
+                                       in = cache.load(originalUrl, true, true);
+                               } catch (IOException e) {
+                                       tracer.error(new IOException(
+                                                       "Cannot save URL to cache, will ignore cache: "
+                                                                       + url, e));
+                               }
+                       }
+
+                       if (in == null) {
+                               throw new IOException(
+                                               "Cannot retrieve the file after storing it in the cache (??)");
+                       }
+                       
+                       return in;
+               } catch (IOException e) {
+                       throw new IOException(String.format(
+                                       "Cannot find %s (current URL: %s)", originalUrl, url), e);
+               }
        }
 
        /**
@@ -230,9 +428,18 @@ public class Downloader {
                        throws IOException {
                URLConnection conn = url.openConnection();
 
-               conn.setRequestProperty("User-Agent", UA);
-               conn.setRequestProperty("Cookie", generateCookies(cookiesValues));
+               String cookies = generateCookies(cookiesValues);
+               if (cookies != null && !cookies.isEmpty()) {
+                       conn.setRequestProperty("Cookie", cookies);
+               }
+
+               if (UA != null) {
+                       conn.setRequestProperty("User-Agent", UA);
+               }
                conn.setRequestProperty("Accept-Encoding", "gzip");
+               conn.setRequestProperty("Accept", "*/*");
+               conn.setRequestProperty("Charset", "utf-8");
+
                if (currentReferer != null) {
                        conn.setRequestProperty("Referer", currentReferer.toString());
                        conn.setRequestProperty("Host", currentReferer.getHost());
@@ -254,7 +461,6 @@ public class Downloader {
                                builder.append(';');
                        }
 
-                       // TODO: check if format is ok
                        builder.append(cookie.toString());
                }