From f6e8d60dbb9f124046f1b951315d74f003624f09 Mon Sep 17 00:00:00 2001 From: Niki Roo Date: Fri, 22 Jun 2018 17:00:53 +0000 Subject: [PATCH] Improve Downloader and Cache: - Downloader: add an optional cache - Cache: auto-clean when saving --- src/be/nikiroo/utils/Cache.java | 8 ++- src/be/nikiroo/utils/Downloader.java | 104 +++++++++++++++++++++++++-- 2 files changed, 106 insertions(+), 6 deletions(-) diff --git a/src/be/nikiroo/utils/Cache.java b/src/be/nikiroo/utils/Cache.java index 13a50ea..4750ef8 100644 --- a/src/be/nikiroo/utils/Cache.java +++ b/src/be/nikiroo/utils/Cache.java @@ -220,8 +220,9 @@ public class Cache { * @param allowTooOld * allow files even if they are considered too old * @param stable - * a stable file (that dones't change too often) -- parameter - * used to check if the file is too old to keep or not + * a stable file (that doesn't change too often) -- parameter + * used to check if the file is too old to keep or not in the + * cache * * @return the opened resource if found, NULL if not */ @@ -290,6 +291,8 @@ public class Cache { /** * Save the given resource to the cache. + *

+ * Will also clean the {@link Cache} from old files. * * @param in * the input data @@ -300,6 +303,7 @@ public class Cache { * in case of I/O error */ private void save(InputStream in, File cached) throws IOException { + clean(true); IOUtils.write(in, cached); } diff --git a/src/be/nikiroo/utils/Downloader.java b/src/be/nikiroo/utils/Downloader.java index 2919ff0..651abc3 100644 --- a/src/be/nikiroo/utils/Downloader.java +++ b/src/be/nikiroo/utils/Downloader.java @@ -29,6 +29,7 @@ public class Downloader { private String UA; private CookieManager cookies; private TraceHandler tracer = new TraceHandler(); + private Cache cache; /** * Create a new {@link Downloader}. @@ -40,11 +41,28 @@ public class Downloader { * only (!) */ public Downloader(String UA) { + this(UA, null); + } + + /** + * Create a new {@link Downloader}. + * + * @param UA + * the User-Agent to use to download the resources -- note that + * some websites require one, some actively blacklist real UAs + * like the one from wget, some whitelist a couple of browsers + * only (!) + * @param cache + * the {@link Cache} to use for all access (can be NULL) + */ + public Downloader(String UA, Cache cache) { this.UA = UA; cookies = new CookieManager(); cookies.setCookiePolicy(CookiePolicy.ACCEPT_ALL); CookieHandler.setDefault(cookies); + + this.cache = cache; } /** @@ -90,7 +108,26 @@ public class Downloader { * in case of I/O error **/ public InputStream open(URL url) throws IOException { - return open(url, url, url, null, null, null, null); + return open(url, false); + } + + /** + * Open the given {@link URL} and update the cookies. + * + * @param url + * the {@link URL} to open + * @param stable + * stable a stable file (that doesn't change too often) -- + * parameter used to check if the file is too old to keep or not + * in the cache (default is false) + * + * @return the {@link InputStream} of the opened page + * + * @throws IOException + * in case of I/O error + **/ + public InputStream open(URL url, boolean stable) throws IOException { + return open(url, url, url, null, null, null, null, stable); } /** @@ -117,8 +154,41 @@ public class Downloader { public InputStream open(URL url, URL currentReferer, Map cookiesValues, Map postParams, Map getParams, String oauth) throws IOException { + return open(url, currentReferer, cookiesValues, postParams, getParams, + oauth, false); + } + + /** + * Open the given {@link URL} and update the cookies. + * + * @param url + * the {@link URL} to open + * @param currentReferer + * the current referer, for websites that needs this info + * @param cookiesValues + * the cookies + * @param postParams + * the POST parameters + * @param getParams + * the GET parameters (priority over POST) + * @param oauth + * OAuth authorization (aka, "bearer XXXXXXX") + * @param stable + * stable a stable file (that doesn't change too often) -- + * parameter used to check if the file is too old to keep or not + * in the cache (default is false) + * + * @return the {@link InputStream} of the opened page + * + * @throws IOException + * in case of I/O error + */ + public InputStream open(URL url, URL currentReferer, + Map cookiesValues, Map postParams, + Map getParams, String oauth, boolean stable) + throws IOException { return open(url, url, currentReferer, cookiesValues, postParams, - getParams, oauth); + getParams, oauth, stable); } /** @@ -134,6 +204,11 @@ public class Downloader { * the GET parameters (priority over POST) * @param oauth * OAuth authorisation (aka, "bearer XXXXXXX") + * @param stable + * a stable file (that doesn't change too often) -- parameter + * used to check if the file is too old to keep or not in the + * cache + * * @return the {@link InputStream} of the opened page * * @throws IOException @@ -142,7 +217,17 @@ public class Downloader { private InputStream open(URL url, final URL originalUrl, URL currentReferer, Map cookiesValues, Map postParams, Map getParams, - String oauth) throws IOException { + String oauth, boolean stable) throws IOException { + + tracer.trace("Request: " + url); + + if (cache != null) { + InputStream in = cache.load(url, false, stable); + if (in != null) { + tracer.trace("Take from cache: " + url); + return in; + } + } tracer.trace("Download: " + url); @@ -213,7 +298,7 @@ public class Downloader { if (repCode / 100 == 3) { String newUrl = conn.getHeaderField("Location"); return open(new URL(newUrl), originalUrl, currentReferer, - cookiesValues, postParams, getParams, oauth); + cookiesValues, postParams, getParams, oauth, stable); } } @@ -222,6 +307,17 @@ public class Downloader { in = new GZIPInputStream(in); } + if (in != null && cache != null) { + tracer.trace("Save to cache: " + url); + try { + cache.save(in, url); + } catch (IOException e) { + tracer.error(new IOException( + "Cannot save URL to cache, will ignore cache: " + url, + e)); + } + } + return in; } -- 2.27.0