X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Futils%2FDownloader.java;fp=src%2Fbe%2Fnikiroo%2Futils%2FDownloader.java;h=67fd652b306323243365271752eb528f3e6523f2;hb=8816d2f781492532ecdbdee8241f53017b44daba;hp=0000000000000000000000000000000000000000;hpb=3f8349b761d29cd3d5381f076d706014574eeb43;p=fanfix.git diff --git a/src/be/nikiroo/utils/Downloader.java b/src/be/nikiroo/utils/Downloader.java new file mode 100644 index 0000000..67fd652 --- /dev/null +++ b/src/be/nikiroo/utils/Downloader.java @@ -0,0 +1,264 @@ +package be.nikiroo.utils; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStreamWriter; +import java.net.CookieHandler; +import java.net.CookieManager; +import java.net.CookiePolicy; +import java.net.CookieStore; +import java.net.HttpCookie; +import java.net.HttpURLConnection; +import java.net.URL; +import java.net.URLConnection; +import java.net.URLEncoder; +import java.util.Map; +import java.util.zip.GZIPInputStream; + +/** + * This class will help you download content from Internet Sites ({@link URL} + * based). + *

+ * It allows you to control some options often required on web sites that do not + * want to simply serve HTML, but actively makes your life difficult with stupid + * checks. + * + * @author niki + */ +public class Downloader { + private String UA; + private CookieManager cookies; + + /** + * Create a new {@link Downloader}. + * + * @param UA + * the User-Agent to use to download the resources -- note that + * some websites require one, some actively blacklist real UAs + * like the one from wget, some whitelist a couple of browsers + * only (!) + */ + public Downloader(String UA) { + this.UA = UA; + + cookies = new CookieManager(); + cookies.setCookiePolicy(CookiePolicy.ACCEPT_ALL); + CookieHandler.setDefault(cookies); + } + + /** + * Clear all the cookies currently in the jar. + *

+ * As long as you don't, the cookies are kept. + */ + public void clearCookies() { + cookies.getCookieStore().removeAll(); + } + + /** + * Open the given {@link URL} and update the cookies. + * + * @param url + * the {@link URL} to open + * @return the {@link InputStream} of the opened page + * + * @throws IOException + * in case of I/O error + **/ + public InputStream open(URL url) throws IOException { + return open(url, url, url, null, null, null, null); + } + + /** + * Open the given {@link URL} and update the cookies. + * + * @param url + * the {@link URL} to open + * @param postParams + * the POST parameters + * @param getParams + * the GET parameters (priority over POST) + * @param oauth + * OAuth authorization (aka, "bearer XXXXXXX") + * + * @return the {@link InputStream} of the opened page + * + * @throws IOException + * in case of I/O error + */ + public InputStream open(URL url, URL currentReferer, + Map cookiesValues, Map postParams, + Map getParams, String oauth) throws IOException { + return open(url, url, currentReferer, cookiesValues, postParams, + getParams, oauth); + } + + /** + * Trace information (info/error) generated by this class. + *

+ * You can override it if you don't want the default sysout/syserr. + * + * @param message + * the message + * @param error + * TRUE for error messages, FALSE for information messages + */ + protected void trace(String message, boolean error) { + if (error) { + System.err.println(message); + } else { + System.out.println(message); + } + } + + /** + * Open the given {@link URL} and update the cookies. + * + * @param url + * the {@link URL} to open + * @param originalUrl + * the original {@link URL} before any redirection occurs + * @param postParams + * the POST parameters + * @param getParams + * the GET parameters (priority over POST) + * @param oauth + * OAuth authorisation (aka, "bearer XXXXXXX") + * @return the {@link InputStream} of the opened page + * + * @throws IOException + * in case of I/O error + */ + private InputStream open(URL url, final URL originalUrl, + URL currentReferer, Map cookiesValues, + Map postParams, Map getParams, + String oauth) throws IOException { + + trace("Download: " + url, false); + + URLConnection conn = openConnectionWithCookies(url, currentReferer, + cookiesValues); + + // Priority: GET over POST + Map params = getParams; + if (getParams == null) { + params = postParams; + } + + if ((params != null || oauth != null) + && conn instanceof HttpURLConnection) { + StringBuilder requestData = null; + if (params != null) { + requestData = new StringBuilder(); + for (Map.Entry param : params.entrySet()) { + if (requestData.length() != 0) + requestData.append('&'); + requestData.append(URLEncoder.encode(param.getKey(), + "UTF-8")); + requestData.append('='); + requestData.append(URLEncoder.encode( + String.valueOf(param.getValue()), "UTF-8")); + } + + conn.setDoOutput(true); + + if (getParams == null && postParams != null) { + ((HttpURLConnection) conn).setRequestMethod("POST"); + } + + conn.setRequestProperty("Content-Type", + "application/x-www-form-urlencoded"); + conn.setRequestProperty("charset", "utf-8"); + } + + if (oauth != null) { + conn.setRequestProperty("Authorization", oauth); + } + + if (requestData != null) { + OutputStreamWriter writer = new OutputStreamWriter( + conn.getOutputStream()); + + writer.write(requestData.toString()); + writer.flush(); + writer.close(); + } + } + + conn.connect(); + + // Check if redirect + if (conn instanceof HttpURLConnection + && ((HttpURLConnection) conn).getResponseCode() / 100 == 3) { + String newUrl = conn.getHeaderField("Location"); + return open(new URL(newUrl), originalUrl, currentReferer, + cookiesValues, postParams, getParams, oauth); + } + + InputStream in = conn.getInputStream(); + if ("gzip".equals(conn.getContentEncoding())) { + in = new GZIPInputStream(in); + } + + return in; + } + + /** + * Open a connection on the given {@link URL}, and manage the cookies that + * come with it. + * + * @param url + * the {@link URL} to open + * + * @return the connection + * + * @throws IOException + * in case of I/O error + */ + private URLConnection openConnectionWithCookies(URL url, + URL currentReferer, Map cookiesValues) + throws IOException { + URLConnection conn = url.openConnection(); + + conn.setRequestProperty("User-Agent", UA); + conn.setRequestProperty("Cookie", generateCookies(cookiesValues)); + conn.setRequestProperty("Accept-Encoding", "gzip"); + if (currentReferer != null) { + conn.setRequestProperty("Referer", currentReferer.toString()); + conn.setRequestProperty("Host", currentReferer.getHost()); + } + + return conn; + } + + /** + * Generate the cookie {@link String} from the local {@link CookieStore} so + * it is ready to be passed. + * + * @return the cookie + */ + private String generateCookies(Map cookiesValues) { + StringBuilder builder = new StringBuilder(); + for (HttpCookie cookie : cookies.getCookieStore().getCookies()) { + if (builder.length() > 0) { + builder.append(';'); + } + + // TODO: check if format is ok + builder.append(cookie.toString()); + } + + if (cookiesValues != null) { + for (Map.Entry set : cookiesValues.entrySet()) { + if (builder.length() > 0) { + builder.append(';'); + } + builder.append(set.getKey()); + builder.append('='); + builder.append(set.getValue()); + } + } + + return builder.toString(); + } +}