X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Futils%2FDownloader.java;h=30ce4e798aff5300b30e14f200546b1e4d09ef16;hb=340e6065e8027c2b3b88549b5761b0b9f6950a53;hp=2919ff01bf551d2d99eca89883c42a1c2e744f51;hpb=6149689f27e74830cf2638c8ceffbe0dca9b82f0;p=fanfix.git
diff --git a/src/be/nikiroo/utils/Downloader.java b/src/be/nikiroo/utils/Downloader.java
index 2919ff0..30ce4e7 100644
--- a/src/be/nikiroo/utils/Downloader.java
+++ b/src/be/nikiroo/utils/Downloader.java
@@ -29,6 +29,7 @@ public class Downloader {
private String UA;
private CookieManager cookies;
private TraceHandler tracer = new TraceHandler();
+ private Cache cache;
/**
* Create a new {@link Downloader}.
@@ -40,11 +41,28 @@ public class Downloader {
* only (!)
*/
public Downloader(String UA) {
+ this(UA, null);
+ }
+
+ /**
+ * Create a new {@link Downloader}.
+ *
+ * @param UA
+ * the User-Agent to use to download the resources -- note that
+ * some websites require one, some actively blacklist real UAs
+ * like the one from wget, some whitelist a couple of browsers
+ * only (!)
+ * @param cache
+ * the {@link Cache} to use for all access (can be NULL)
+ */
+ public Downloader(String UA, Cache cache) {
this.UA = UA;
cookies = new CookieManager();
cookies.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
CookieHandler.setDefault(cookies);
+
+ this.cache = cache;
}
/**
@@ -70,6 +88,25 @@ public class Downloader {
this.tracer = tracer;
}
+ /**
+ * The {@link Cache} to use for all access (can be NULL).
+ *
+ * @return the cache
+ */
+ public Cache getCache() {
+ return cache;
+ }
+
+ /**
+ * The {@link Cache} to use for all access (can be NULL).
+ *
+ * @param cache
+ * the new cache
+ */
+ public void setCache(Cache cache) {
+ this.cache = cache;
+ }
+
/**
* Clear all the cookies currently in the jar.
*
@@ -90,7 +127,26 @@ public class Downloader {
* in case of I/O error
**/
public InputStream open(URL url) throws IOException {
- return open(url, url, url, null, null, null, null);
+ return open(url, false);
+ }
+
+ /**
+ * Open the given {@link URL} and update the cookies.
+ *
+ * @param url
+ * the {@link URL} to open
+ * @param stable
+ * stable a stable file (that doesn't change too often) --
+ * parameter used to check if the file is too old to keep or not
+ * in the cache (default is false)
+ *
+ * @return the {@link InputStream} of the opened page
+ *
+ * @throws IOException
+ * in case of I/O error
+ **/
+ public InputStream open(URL url, boolean stable) throws IOException {
+ return open(url, url, url, null, null, null, null, stable);
}
/**
@@ -117,8 +173,41 @@ public class Downloader {
public InputStream open(URL url, URL currentReferer,
Map cookiesValues, Map postParams,
Map getParams, String oauth) throws IOException {
+ return open(url, currentReferer, cookiesValues, postParams, getParams,
+ oauth, false);
+ }
+
+ /**
+ * Open the given {@link URL} and update the cookies.
+ *
+ * @param url
+ * the {@link URL} to open
+ * @param currentReferer
+ * the current referer, for websites that needs this info
+ * @param cookiesValues
+ * the cookies
+ * @param postParams
+ * the POST parameters
+ * @param getParams
+ * the GET parameters (priority over POST)
+ * @param oauth
+ * OAuth authorization (aka, "bearer XXXXXXX")
+ * @param stable
+ * stable a stable file (that doesn't change too often) --
+ * parameter used to check if the file is too old to keep or not
+ * in the cache (default is false)
+ *
+ * @return the {@link InputStream} of the opened page
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ public InputStream open(URL url, URL currentReferer,
+ Map cookiesValues, Map postParams,
+ Map getParams, String oauth, boolean stable)
+ throws IOException {
return open(url, url, currentReferer, cookiesValues, postParams,
- getParams, oauth);
+ getParams, oauth, stable);
}
/**
@@ -127,22 +216,44 @@ public class Downloader {
* @param url
* the {@link URL} to open
* @param originalUrl
- * the original {@link URL} before any redirection occurs
+ * the original {@link URL} before any redirection occurs, which
+ * is also used for the cache ID if needed (so we can retrieve
+ * the content with this URL if needed)
+ * @param currentReferer
+ * the current referer, for websites that needs this info
+ * @param cookiesValues
+ * the cookies
* @param postParams
* the POST parameters
* @param getParams
* the GET parameters (priority over POST)
* @param oauth
* OAuth authorisation (aka, "bearer XXXXXXX")
+ * @param stable
+ * a stable file (that doesn't change too often) -- parameter
+ * used to check if the file is too old to keep or not in the
+ * cache
+ *
* @return the {@link InputStream} of the opened page
*
* @throws IOException
* in case of I/O error
*/
- private InputStream open(URL url, final URL originalUrl,
- URL currentReferer, Map cookiesValues,
- Map postParams, Map getParams,
- String oauth) throws IOException {
+ public InputStream open(URL url, final URL originalUrl, URL currentReferer,
+ Map cookiesValues, Map postParams,
+ Map getParams, String oauth, boolean stable)
+ throws IOException {
+
+ tracer.trace("Request: " + url);
+
+ if (cache != null) {
+ InputStream in = cache.load(originalUrl, false, stable);
+ if (in != null) {
+ tracer.trace("Use the cache: " + url);
+ tracer.trace("Original URL : " + originalUrl);
+ return in;
+ }
+ }
tracer.trace("Download: " + url);
@@ -213,7 +324,7 @@ public class Downloader {
if (repCode / 100 == 3) {
String newUrl = conn.getHeaderField("Location");
return open(new URL(newUrl), originalUrl, currentReferer,
- cookiesValues, postParams, getParams, oauth);
+ cookiesValues, postParams, getParams, oauth, stable);
}
}
@@ -222,6 +333,22 @@ public class Downloader {
in = new GZIPInputStream(in);
}
+ if (in != null && cache != null) {
+ tracer.trace("Save to cache: " + originalUrl);
+ try {
+ try {
+ cache.save(in, originalUrl);
+ } finally {
+ in.close();
+ }
+ in = cache.load(originalUrl, true, false);
+ } catch (IOException e) {
+ tracer.error(new IOException(
+ "Cannot save URL to cache, will ignore cache: " + url,
+ e));
+ }
+ }
+
return in;
}
@@ -266,7 +393,6 @@ public class Downloader {
builder.append(';');
}
- // TODO: check if format is ok
builder.append(cookie.toString());
}