fix downloader spent input
[nikiroo-utils.git] / src / be / nikiroo / utils / Downloader.java
index e01ec1dcc24cb3abccc28bb3618b62edf0ad7a54..1ec4379a92b355cc1261e0d80d2c057dee0ff8ee 100644 (file)
@@ -29,6 +29,7 @@ public class Downloader {
        private String UA;
        private CookieManager cookies;
        private TraceHandler tracer = new TraceHandler();
+       private Cache cache;
 
        /**
         * Create a new {@link Downloader}.
@@ -40,11 +41,28 @@ public class Downloader {
         *            only (!)
         */
        public Downloader(String UA) {
+               this(UA, null);
+       }
+
+       /**
+        * Create a new {@link Downloader}.
+        * 
+        * @param UA
+        *            the User-Agent to use to download the resources -- note that
+        *            some websites require one, some actively blacklist real UAs
+        *            like the one from wget, some whitelist a couple of browsers
+        *            only (!)
+        * @param cache
+        *            the {@link Cache} to use for all access (can be NULL)
+        */
+       public Downloader(String UA, Cache cache) {
                this.UA = UA;
 
                cookies = new CookieManager();
                cookies.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
                CookieHandler.setDefault(cookies);
+
+               this.cache = cache;
        }
 
        /**
@@ -70,6 +88,25 @@ public class Downloader {
                this.tracer = tracer;
        }
 
+       /**
+        * The {@link Cache} to use for all access (can be NULL).
+        * 
+        * @return the cache
+        */
+       public Cache getCache() {
+               return cache;
+       }
+
+       /**
+        * The {@link Cache} to use for all access (can be NULL).
+        * 
+        * @param cache
+        *            the new cache
+        */
+       public void setCache(Cache cache) {
+               this.cache = cache;
+       }
+
        /**
         * Clear all the cookies currently in the jar.
         * <p>
@@ -90,7 +127,26 @@ public class Downloader {
         *             in case of I/O error
         **/
        public InputStream open(URL url) throws IOException {
-               return open(url, url, url, null, null, null, null);
+               return open(url, false);
+       }
+
+       /**
+        * Open the given {@link URL} and update the cookies.
+        * 
+        * @param url
+        *            the {@link URL} to open
+        * @param stable
+        *            stable a stable file (that doesn't change too often) --
+        *            parameter used to check if the file is too old to keep or not
+        *            in the cache (default is false)
+        * 
+        * @return the {@link InputStream} of the opened page
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        **/
+       public InputStream open(URL url, boolean stable) throws IOException {
+               return open(url, url, url, null, null, null, null, stable);
        }
 
        /**
@@ -117,8 +173,41 @@ public class Downloader {
        public InputStream open(URL url, URL currentReferer,
                        Map<String, String> cookiesValues, Map<String, String> postParams,
                        Map<String, String> getParams, String oauth) throws IOException {
+               return open(url, currentReferer, cookiesValues, postParams, getParams,
+                               oauth, false);
+       }
+
+       /**
+        * Open the given {@link URL} and update the cookies.
+        * 
+        * @param url
+        *            the {@link URL} to open
+        * @param currentReferer
+        *            the current referer, for websites that needs this info
+        * @param cookiesValues
+        *            the cookies
+        * @param postParams
+        *            the POST parameters
+        * @param getParams
+        *            the GET parameters (priority over POST)
+        * @param oauth
+        *            OAuth authorization (aka, "bearer XXXXXXX")
+        * @param stable
+        *            stable a stable file (that doesn't change too often) --
+        *            parameter used to check if the file is too old to keep or not
+        *            in the cache (default is false)
+        * 
+        * @return the {@link InputStream} of the opened page
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        */
+       public InputStream open(URL url, URL currentReferer,
+                       Map<String, String> cookiesValues, Map<String, String> postParams,
+                       Map<String, String> getParams, String oauth, boolean stable)
+                       throws IOException {
                return open(url, url, currentReferer, cookiesValues, postParams,
-                               getParams, oauth);
+                               getParams, oauth, stable);
        }
 
        /**
@@ -127,22 +216,44 @@ public class Downloader {
         * @param url
         *            the {@link URL} to open
         * @param originalUrl
-        *            the original {@link URL} before any redirection occurs
+        *            the original {@link URL} before any redirection occurs, which
+        *            is also used for the cache ID if needed (so we can retrieve
+        *            the content with this URL if needed)
+        * @param currentReferer
+        *            the current referer, for websites that needs this info
+        * @param cookiesValues
+        *            the cookies
         * @param postParams
         *            the POST parameters
         * @param getParams
         *            the GET parameters (priority over POST)
         * @param oauth
         *            OAuth authorisation (aka, "bearer XXXXXXX")
+        * @param stable
+        *            a stable file (that doesn't change too often) -- parameter
+        *            used to check if the file is too old to keep or not in the
+        *            cache
+        * 
         * @return the {@link InputStream} of the opened page
         * 
         * @throws IOException
         *             in case of I/O error
         */
-       private InputStream open(URL url, final URL originalUrl,
-                       URL currentReferer, Map<String, String> cookiesValues,
-                       Map<String, String> postParams, Map<String, String> getParams,
-                       String oauth) throws IOException {
+       public InputStream open(URL url, final URL originalUrl, URL currentReferer,
+                       Map<String, String> cookiesValues, Map<String, String> postParams,
+                       Map<String, String> getParams, String oauth, boolean stable)
+                       throws IOException {
+
+               tracer.trace("Request: " + url);
+
+               if (cache != null) {
+                       InputStream in = cache.load(originalUrl, false, stable);
+                       if (in != null) {
+                               tracer.trace("Use the cache: " + url);
+                               tracer.trace("Original URL : " + originalUrl);
+                               return in;
+                       }
+               }
 
                tracer.trace("Download: " + url);
 
@@ -186,23 +297,35 @@ public class Downloader {
                        }
 
                        if (requestData != null) {
-                               OutputStreamWriter writer = new OutputStreamWriter(
-                                               conn.getOutputStream());
-
-                               writer.write(requestData.toString());
-                               writer.flush();
-                               writer.close();
+                               OutputStreamWriter writer = null;
+                               try {
+                                       writer = new OutputStreamWriter(conn.getOutputStream());
+                                       writer.write(requestData.toString());
+                                       writer.flush();
+                               } finally {
+                                       if (writer != null) {
+                                               writer.close();
+                                       }
+                               }
                        }
                }
 
                conn.connect();
 
                // Check if redirect
-               if (conn instanceof HttpURLConnection
-                               && ((HttpURLConnection) conn).getResponseCode() / 100 == 3) {
-                       String newUrl = conn.getHeaderField("Location");
-                       return open(new URL(newUrl), originalUrl, currentReferer,
-                                       cookiesValues, postParams, getParams, oauth);
+               if (conn instanceof HttpURLConnection) {
+                       int repCode = 0;
+                       try {
+                               // Can fail in some circumstances
+                               repCode = ((HttpURLConnection) conn).getResponseCode();
+                       } catch (IOException e) {
+                       }
+
+                       if (repCode / 100 == 3) {
+                               String newUrl = conn.getHeaderField("Location");
+                               return open(new URL(newUrl), originalUrl, currentReferer,
+                                               cookiesValues, postParams, getParams, oauth, stable);
+                       }
                }
 
                InputStream in = conn.getInputStream();
@@ -210,6 +333,19 @@ public class Downloader {
                        in = new GZIPInputStream(in);
                }
 
+               if (in != null && cache != null) {
+                       tracer.trace("Save to cache: " + originalUrl);
+                       try {
+                               cache.save(in, originalUrl);
+                               in.close();
+                               in = cache.load(originalUrl, true, false);
+                       } catch (IOException e) {
+                               tracer.error(new IOException(
+                                               "Cannot save URL to cache, will ignore cache: " + url,
+                                               e));
+                       }
+               }
+
                return in;
        }
 
@@ -254,7 +390,6 @@ public class Downloader {
                                builder.append(';');
                        }
 
-                       // TODO: check if format is ok
                        builder.append(cookie.toString());
                }