X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Futils%2FDownloader.java;h=4191d0aea0da85511e2741c50c2a6c83806f612f;hb=53c2b6a134b08402e1daf3e4c84b9b888de9cc9c;hp=0487933295ec8c4902665d2bffcd81e57debda2c;hpb=67b8ce74c4277c894aba234a7a493a93f6225a69;p=nikiroo-utils.git
diff --git a/src/be/nikiroo/utils/Downloader.java b/src/be/nikiroo/utils/Downloader.java
deleted file mode 100644
index 0487933..0000000
--- a/src/be/nikiroo/utils/Downloader.java
+++ /dev/null
@@ -1,478 +0,0 @@
-package be.nikiroo.utils;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStreamWriter;
-import java.net.CookieHandler;
-import java.net.CookieManager;
-import java.net.CookiePolicy;
-import java.net.CookieStore;
-import java.net.HttpCookie;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.net.URLConnection;
-import java.net.URLEncoder;
-import java.util.Map;
-import java.util.zip.GZIPInputStream;
-
-/**
- * This class will help you download content from Internet Sites ({@link URL}
- * based).
- *
- * It allows you to control some options often required on web sites that do not
- * want to simply serve HTML, but actively makes your life difficult with stupid
- * checks.
- *
- * @author niki
- */
-public class Downloader {
- private String UA;
- private CookieManager cookies;
- private TraceHandler tracer = new TraceHandler();
- private Cache cache;
- private boolean offline;
-
- /**
- * Create a new {@link Downloader}.
- *
- * @param UA
- * the User-Agent to use to download the resources -- note that
- * some websites require one, some actively blacklist real UAs
- * like the one from wget, some whitelist a couple of browsers
- * only (!)
- */
- public Downloader(String UA) {
- this(UA, null);
- }
-
- /**
- * Create a new {@link Downloader}.
- *
- * @param UA
- * the User-Agent to use to download the resources -- note that
- * some websites require one, some actively blacklist real UAs
- * like the one from wget, some whitelist a couple of browsers
- * only (!)
- * @param cache
- * the {@link Cache} to use for all access (can be NULL)
- */
- public Downloader(String UA, Cache cache) {
- this.UA = UA;
-
- cookies = new CookieManager(null, CookiePolicy.ACCEPT_ALL);
- CookieHandler.setDefault(cookies);
-
- setCache(cache);
- }
-
- /**
- * This {@link Downloader} is forbidden to try and connect to the network.
- *
- * If TRUE, it will only check the cache if any.
- *
- * Default is FALSE.
- *
- * @return TRUE if offline
- */
- public boolean isOffline() {
- return offline;
- }
-
- /**
- * This {@link Downloader} is forbidden to try and connect to the network.
- *
- * If TRUE, it will only check the cache if any.
- *
- * Default is FALSE.
- *
- * @param offline TRUE for offline, FALSE for online
- */
- public void setOffline(boolean offline) {
- this.offline = offline;
- }
-
- /**
- * The traces handler for this {@link Cache}.
- *
- * @return the traces handler
- */
- public TraceHandler getTraceHandler() {
- return tracer;
- }
-
- /**
- * The traces handler for this {@link Cache}.
- *
- * @param tracer
- * the new traces handler
- */
- public void setTraceHandler(TraceHandler tracer) {
- if (tracer == null) {
- tracer = new TraceHandler(false, false, false);
- }
-
- this.tracer = tracer;
- }
-
- /**
- * The {@link Cache} to use for all access (can be NULL).
- *
- * @return the cache
- */
- public Cache getCache() {
- return cache;
- }
-
- /**
- * The {@link Cache} to use for all access (can be NULL).
- *
- * @param cache
- * the new cache
- */
- public void setCache(Cache cache) {
- this.cache = cache;
- }
-
- /**
- * Clear all the cookies currently in the jar.
- *
- * As long as you don't, the cookies are kept.
- */
- public void clearCookies() {
- cookies.getCookieStore().removeAll();
- }
-
- /**
- * Open the given {@link URL} and update the cookies.
- *
- * @param url
- * the {@link URL} to open
- * @return the {@link InputStream} of the opened page
- *
- * @throws IOException
- * in case of I/O error
- **/
- public InputStream open(URL url) throws IOException {
- return open(url, false);
- }
-
- /**
- * Open the given {@link URL} and update the cookies.
- *
- * @param url
- * the {@link URL} to open
- * @param stable
- * stable a stable file (that doesn't change too often) --
- * parameter used to check if the file is too old to keep or not
- * in the cache (default is false)
- *
- * @return the {@link InputStream} of the opened page
- *
- * @throws IOException
- * in case of I/O error
- **/
- public InputStream open(URL url, boolean stable) throws IOException {
- return open(url, url, url, null, null, null, null, stable);
- }
-
- /**
- * Open the given {@link URL} and update the cookies.
- *
- * @param url
- * the {@link URL} to open
- * @param currentReferer
- * the current referer, for websites that needs this info
- * @param cookiesValues
- * the cookies
- * @param postParams
- * the POST parameters
- * @param getParams
- * the GET parameters (priority over POST)
- * @param oauth
- * OAuth authorization (aka, "bearer XXXXXXX")
- *
- * @return the {@link InputStream} of the opened page
- *
- * @throws IOException
- * in case of I/O error (including offline mode + not in cache)
- */
- public InputStream open(URL url, URL currentReferer,
- Map cookiesValues, Map postParams,
- Map getParams, String oauth) throws IOException {
- return open(url, currentReferer, cookiesValues, postParams, getParams,
- oauth, false);
- }
-
- /**
- * Open the given {@link URL} and update the cookies.
- *
- * @param url
- * the {@link URL} to open
- * @param currentReferer
- * the current referer, for websites that needs this info
- * @param cookiesValues
- * the cookies
- * @param postParams
- * the POST parameters
- * @param getParams
- * the GET parameters (priority over POST)
- * @param oauth
- * OAuth authorization (aka, "bearer XXXXXXX")
- * @param stable
- * stable a stable file (that doesn't change too often) --
- * parameter used to check if the file is too old to keep or not
- * in the cache (default is false)
- *
- * @return the {@link InputStream} of the opened page
- *
- * @throws IOException
- * in case of I/O error (including offline mode + not in cache)
- */
- public InputStream open(URL url, URL currentReferer,
- Map cookiesValues, Map postParams,
- Map getParams, String oauth, boolean stable)
- throws IOException {
- return open(url, url, currentReferer, cookiesValues, postParams,
- getParams, oauth, stable);
- }
-
- /**
- * Open the given {@link URL} and update the cookies.
- *
- * @param url
- * the {@link URL} to open
- * @param originalUrl
- * the original {@link URL} before any redirection occurs, which
- * is also used for the cache ID if needed (so we can retrieve
- * the content with this URL if needed)
- * @param currentReferer
- * the current referer, for websites that needs this info
- * @param cookiesValues
- * the cookies
- * @param postParams
- * the POST parameters
- * @param getParams
- * the GET parameters (priority over POST)
- * @param oauth
- * OAuth authorisation (aka, "bearer XXXXXXX")
- * @param stable
- * a stable file (that doesn't change too often) -- parameter
- * used to check if the file is too old to keep or not in the
- * cache
- *
- * @return the {@link InputStream} of the opened page
- *
- * @throws IOException
- * in case of I/O error (including offline mode + not in cache)
- */
- public InputStream open(URL url, final URL originalUrl, URL currentReferer,
- Map cookiesValues, Map postParams,
- Map getParams, String oauth, boolean stable)
- throws IOException {
-
- tracer.trace("Request: " + url);
-
- if (cache != null) {
- InputStream in = cache.load(originalUrl, false, stable);
- if (in != null) {
- tracer.trace("Use the cache: " + url);
- tracer.trace("Original URL : " + originalUrl);
- return in;
- }
- }
-
- String protocol = originalUrl == null ? null : originalUrl
- .getProtocol();
- if (isOffline() && !"file".equalsIgnoreCase(protocol)) {
- tracer.error("Downloader OFFLINE, cannot proceed to URL: " + url);
- throw new IOException("Downloader is currently OFFLINE, cannot download: " + url);
- }
-
- tracer.trace("Download: " + url);
-
- URLConnection conn = openConnectionWithCookies(url, currentReferer,
- cookiesValues);
-
- // Priority: GET over POST
- Map params = getParams;
- if (getParams == null) {
- params = postParams;
- }
-
- StringBuilder requestData = null;
- if ((params != null || oauth != null)
- && conn instanceof HttpURLConnection) {
- if (params != null) {
- requestData = new StringBuilder();
- for (Map.Entry param : params.entrySet()) {
- if (requestData.length() != 0)
- requestData.append('&');
- requestData.append(URLEncoder.encode(param.getKey(),
- "UTF-8"));
- requestData.append('=');
- requestData.append(URLEncoder.encode(
- String.valueOf(param.getValue()), "UTF-8"));
- }
-
- if (getParams == null && postParams != null) {
- ((HttpURLConnection) conn).setRequestMethod("POST");
- }
-
- conn.setRequestProperty("Content-Type",
- "application/x-www-form-urlencoded");
- conn.setRequestProperty("Content-Length",
- Integer.toString(requestData.length()));
- }
-
- if (oauth != null) {
- conn.setRequestProperty("Authorization", oauth);
- }
-
- if (requestData != null) {
- conn.setDoOutput(true);
- OutputStreamWriter writer = new OutputStreamWriter(
- conn.getOutputStream());
- try {
- writer.write(requestData.toString());
- writer.flush();
- } finally {
- writer.close();
- }
- }
- }
-
- // Manual redirection, much better for POST data
- if (conn instanceof HttpURLConnection) {
- ((HttpURLConnection) conn).setInstanceFollowRedirects(false);
- }
-
- conn.connect();
-
- // Check if redirect
- // BEWARE! POST data cannot be redirected (some webservers complain) for
- // HTTP codes 302 and 303
- if (conn instanceof HttpURLConnection) {
- int repCode = 0;
- try {
- // Can fail in some circumstances
- repCode = ((HttpURLConnection) conn).getResponseCode();
- } catch (IOException e) {
- }
-
- if (repCode / 100 == 3) {
- String newUrl = conn.getHeaderField("Location");
- return open(new URL(newUrl), originalUrl, currentReferer,
- cookiesValues, //
- (repCode == 302 || repCode == 303) ? null : postParams, //
- getParams, oauth, stable);
- }
- }
-
- try {
- InputStream in = conn.getInputStream();
- if ("gzip".equals(conn.getContentEncoding())) {
- in = new GZIPInputStream(in);
- }
-
- if (in == null) {
- throw new IOException("No InputStream!");
- }
-
- if (cache != null) {
- String size = conn.getContentLength() < 0 ? "unknown size"
- : StringUtils.formatNumber(conn.getContentLength())
- + "bytes";
- tracer.trace("Save to cache (" + size + "): " + originalUrl);
- try {
- try {
- long bytes = cache.save(in, originalUrl);
- tracer.trace("Saved to cache: "
- + StringUtils.formatNumber(bytes) + "bytes");
- } finally {
- in.close();
- }
- in = cache.load(originalUrl, true, true);
- } catch (IOException e) {
- tracer.error(new IOException(
- "Cannot save URL to cache, will ignore cache: "
- + url, e));
- }
- }
-
- if (in == null) {
- throw new IOException(
- "Cannot retrieve the file after storing it in the cache (??)");
- }
-
- return in;
- } catch (IOException e) {
- throw new IOException(String.format(
- "Cannot find %s (current URL: %s)", originalUrl, url), e);
- }
- }
-
- /**
- * Open a connection on the given {@link URL}, and manage the cookies that
- * come with it.
- *
- * @param url
- * the {@link URL} to open
- *
- * @return the connection
- *
- * @throws IOException
- * in case of I/O error
- */
- private URLConnection openConnectionWithCookies(URL url,
- URL currentReferer, Map cookiesValues)
- throws IOException {
- URLConnection conn = url.openConnection();
-
- String cookies = generateCookies(cookiesValues);
- if (cookies != null && !cookies.isEmpty()) {
- conn.setRequestProperty("Cookie", cookies);
- }
-
- conn.setRequestProperty("User-Agent", UA);
- conn.setRequestProperty("Accept-Encoding", "gzip");
- conn.setRequestProperty("Accept", "*/*");
- conn.setRequestProperty("Charset", "utf-8");
-
- if (currentReferer != null) {
- conn.setRequestProperty("Referer", currentReferer.toString());
- conn.setRequestProperty("Host", currentReferer.getHost());
- }
-
- return conn;
- }
-
- /**
- * Generate the cookie {@link String} from the local {@link CookieStore} so
- * it is ready to be passed.
- *
- * @return the cookie
- */
- private String generateCookies(Map cookiesValues) {
- StringBuilder builder = new StringBuilder();
- for (HttpCookie cookie : cookies.getCookieStore().getCookies()) {
- if (builder.length() > 0) {
- builder.append(';');
- }
-
- builder.append(cookie.toString());
- }
-
- if (cookiesValues != null) {
- for (Map.Entry set : cookiesValues.entrySet()) {
- if (builder.length() > 0) {
- builder.append(';');
- }
- builder.append(set.getKey());
- builder.append('=');
- builder.append(set.getValue());
- }
- }
-
- return builder.toString();
- }
-}