better cache
[nikiroo-utils.git] / src / be / nikiroo / utils / Downloader.java
index 67fd652b306323243365271752eb528f3e6523f2..efc6a98df54f11a91c271646daf427d7cfca6fdf 100644 (file)
@@ -28,6 +28,8 @@ import java.util.zip.GZIPInputStream;
 public class Downloader {
        private String UA;
        private CookieManager cookies;
+       private TraceHandler tracer = new TraceHandler();
+       private Cache cache;
 
        /**
         * Create a new {@link Downloader}.
@@ -39,11 +41,70 @@ public class Downloader {
         *            only (!)
         */
        public Downloader(String UA) {
+               this(UA, null);
+       }
+
+       /**
+        * Create a new {@link Downloader}.
+        * 
+        * @param UA
+        *            the User-Agent to use to download the resources -- note that
+        *            some websites require one, some actively blacklist real UAs
+        *            like the one from wget, some whitelist a couple of browsers
+        *            only (!)
+        * @param cache
+        *            the {@link Cache} to use for all access (can be NULL)
+        */
+       public Downloader(String UA, Cache cache) {
                this.UA = UA;
 
                cookies = new CookieManager();
                cookies.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
                CookieHandler.setDefault(cookies);
+
+               this.cache = cache;
+       }
+
+       /**
+        * The traces handler for this {@link Cache}.
+        * 
+        * @return the traces handler
+        */
+       public TraceHandler getTraceHandler() {
+               return tracer;
+       }
+
+       /**
+        * The traces handler for this {@link Cache}.
+        * 
+        * @param tracer
+        *            the new traces handler
+        */
+       public void setTraceHandler(TraceHandler tracer) {
+               if (tracer == null) {
+                       tracer = new TraceHandler(false, false, false);
+               }
+
+               this.tracer = tracer;
+       }
+
+       /**
+        * The {@link Cache} to use for all access (can be NULL).
+        * 
+        * @return the cache
+        */
+       public Cache getCache() {
+               return cache;
+       }
+
+       /**
+        * The {@link Cache} to use for all access (can be NULL).
+        * 
+        * @param cache
+        *            the new cache
+        */
+       public void setCache(Cache cache) {
+               this.cache = cache;
        }
 
        /**
@@ -66,7 +127,7 @@ public class Downloader {
         *             in case of I/O error
         **/
        public InputStream open(URL url) throws IOException {
-               return open(url, url, url, null, null, null, null);
+               return open(url, false);
        }
 
        /**
@@ -74,6 +135,29 @@ public class Downloader {
         * 
         * @param url
         *            the {@link URL} to open
+        * @param stable
+        *            stable a stable file (that doesn't change too often) --
+        *            parameter used to check if the file is too old to keep or not
+        *            in the cache (default is false)
+        * 
+        * @return the {@link InputStream} of the opened page
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        **/
+       public InputStream open(URL url, boolean stable) throws IOException {
+               return open(url, url, url, null, null, null, null, stable);
+       }
+
+       /**
+        * Open the given {@link URL} and update the cookies.
+        * 
+        * @param url
+        *            the {@link URL} to open
+        * @param currentReferer
+        *            the current referer, for websites that needs this info
+        * @param cookiesValues
+        *            the cookies
         * @param postParams
         *            the POST parameters
         * @param getParams
@@ -89,26 +173,41 @@ public class Downloader {
        public InputStream open(URL url, URL currentReferer,
                        Map<String, String> cookiesValues, Map<String, String> postParams,
                        Map<String, String> getParams, String oauth) throws IOException {
-               return open(url, url, currentReferer, cookiesValues, postParams,
-                               getParams, oauth);
+               return open(url, currentReferer, cookiesValues, postParams, getParams,
+                               oauth, false);
        }
 
        /**
-        * Trace information (info/error) generated by this class.
-        * <p>
-        * You can override it if you don't want the default sysout/syserr.
+        * Open the given {@link URL} and update the cookies.
         * 
-        * @param message
-        *            the message
-        * @param error
-        *            TRUE for error messages, FALSE for information messages
+        * @param url
+        *            the {@link URL} to open
+        * @param currentReferer
+        *            the current referer, for websites that needs this info
+        * @param cookiesValues
+        *            the cookies
+        * @param postParams
+        *            the POST parameters
+        * @param getParams
+        *            the GET parameters (priority over POST)
+        * @param oauth
+        *            OAuth authorization (aka, "bearer XXXXXXX")
+        * @param stable
+        *            stable a stable file (that doesn't change too often) --
+        *            parameter used to check if the file is too old to keep or not
+        *            in the cache (default is false)
+        * 
+        * @return the {@link InputStream} of the opened page
+        * 
+        * @throws IOException
+        *             in case of I/O error
         */
-       protected void trace(String message, boolean error) {
-               if (error) {
-                       System.err.println(message);
-               } else {
-                       System.out.println(message);
-               }
+       public InputStream open(URL url, URL currentReferer,
+                       Map<String, String> cookiesValues, Map<String, String> postParams,
+                       Map<String, String> getParams, String oauth, boolean stable)
+                       throws IOException {
+               return open(url, url, currentReferer, cookiesValues, postParams,
+                               getParams, oauth, stable);
        }
 
        /**
@@ -117,24 +216,46 @@ public class Downloader {
         * @param url
         *            the {@link URL} to open
         * @param originalUrl
-        *            the original {@link URL} before any redirection occurs
+        *            the original {@link URL} before any redirection occurs, which
+        *            is also used for the cache ID if needed (so we can retrieve
+        *            the content with this URL if needed)
+        * @param currentReferer
+        *            the current referer, for websites that needs this info
+        * @param cookiesValues
+        *            the cookies
         * @param postParams
         *            the POST parameters
         * @param getParams
         *            the GET parameters (priority over POST)
         * @param oauth
         *            OAuth authorisation (aka, "bearer XXXXXXX")
+        * @param stable
+        *            a stable file (that doesn't change too often) -- parameter
+        *            used to check if the file is too old to keep or not in the
+        *            cache
+        * 
         * @return the {@link InputStream} of the opened page
         * 
         * @throws IOException
         *             in case of I/O error
         */
-       private InputStream open(URL url, final URL originalUrl,
-                       URL currentReferer, Map<String, String> cookiesValues,
-                       Map<String, String> postParams, Map<String, String> getParams,
-                       String oauth) throws IOException {
+       public InputStream open(URL url, final URL originalUrl, URL currentReferer,
+                       Map<String, String> cookiesValues, Map<String, String> postParams,
+                       Map<String, String> getParams, String oauth, boolean stable)
+                       throws IOException {
+
+               tracer.trace("Request: " + url);
+
+               if (cache != null) {
+                       InputStream in = cache.load(originalUrl, false, stable);
+                       if (in != null) {
+                               tracer.trace("Use the cache: " + url);
+                               tracer.trace("Original URL : " + originalUrl);
+                               return in;
+                       }
+               }
 
-               trace("Download: " + url, false);
+               tracer.trace("Download: " + url);
 
                URLConnection conn = openConnectionWithCookies(url, currentReferer,
                                cookiesValues);
@@ -176,23 +297,35 @@ public class Downloader {
                        }
 
                        if (requestData != null) {
-                               OutputStreamWriter writer = new OutputStreamWriter(
-                                               conn.getOutputStream());
-
-                               writer.write(requestData.toString());
-                               writer.flush();
-                               writer.close();
+                               OutputStreamWriter writer = null;
+                               try {
+                                       writer = new OutputStreamWriter(conn.getOutputStream());
+                                       writer.write(requestData.toString());
+                                       writer.flush();
+                               } finally {
+                                       if (writer != null) {
+                                               writer.close();
+                                       }
+                               }
                        }
                }
 
                conn.connect();
 
                // Check if redirect
-               if (conn instanceof HttpURLConnection
-                               && ((HttpURLConnection) conn).getResponseCode() / 100 == 3) {
-                       String newUrl = conn.getHeaderField("Location");
-                       return open(new URL(newUrl), originalUrl, currentReferer,
-                                       cookiesValues, postParams, getParams, oauth);
+               if (conn instanceof HttpURLConnection) {
+                       int repCode = 0;
+                       try {
+                               // Can fail in some circumstances
+                               repCode = ((HttpURLConnection) conn).getResponseCode();
+                       } catch (IOException e) {
+                       }
+
+                       if (repCode / 100 == 3) {
+                               String newUrl = conn.getHeaderField("Location");
+                               return open(new URL(newUrl), originalUrl, currentReferer,
+                                               cookiesValues, postParams, getParams, oauth, stable);
+                       }
                }
 
                InputStream in = conn.getInputStream();
@@ -200,6 +333,17 @@ public class Downloader {
                        in = new GZIPInputStream(in);
                }
 
+               if (in != null && cache != null) {
+                       tracer.trace("Save to cache: " + url);
+                       try {
+                               cache.save(in, url);
+                       } catch (IOException e) {
+                               tracer.error(new IOException(
+                                               "Cannot save URL to cache, will ignore cache: " + url,
+                                               e));
+                       }
+               }
+
                return in;
        }
 
@@ -244,7 +388,6 @@ public class Downloader {
                                builder.append(';');
                        }
 
-                       // TODO: check if format is ok
                        builder.append(cookie.toString());
                }