X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;ds=inline;f=src%2Fbe%2Fnikiroo%2Futils%2FDownloader.java;fp=src%2Fbe%2Fnikiroo%2Futils%2FDownloader.java;h=67fd652b306323243365271752eb528f3e6523f2;hb=8816d2f781492532ecdbdee8241f53017b44daba;hp=0000000000000000000000000000000000000000;hpb=3f8349b761d29cd3d5381f076d706014574eeb43;p=fanfix.git diff --git a/src/be/nikiroo/utils/Downloader.java b/src/be/nikiroo/utils/Downloader.java new file mode 100644 index 0000000..67fd652 --- /dev/null +++ b/src/be/nikiroo/utils/Downloader.java @@ -0,0 +1,264 @@ +package be.nikiroo.utils; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStreamWriter; +import java.net.CookieHandler; +import java.net.CookieManager; +import java.net.CookiePolicy; +import java.net.CookieStore; +import java.net.HttpCookie; +import java.net.HttpURLConnection; +import java.net.URL; +import java.net.URLConnection; +import java.net.URLEncoder; +import java.util.Map; +import java.util.zip.GZIPInputStream; + +/** + * This class will help you download content from Internet Sites ({@link URL} + * based). + *
+ * It allows you to control some options often required on web sites that do not + * want to simply serve HTML, but actively makes your life difficult with stupid + * checks. + * + * @author niki + */ +public class Downloader { + private String UA; + private CookieManager cookies; + + /** + * Create a new {@link Downloader}. + * + * @param UA + * the User-Agent to use to download the resources -- note that + * some websites require one, some actively blacklist real UAs + * like the one from wget, some whitelist a couple of browsers + * only (!) + */ + public Downloader(String UA) { + this.UA = UA; + + cookies = new CookieManager(); + cookies.setCookiePolicy(CookiePolicy.ACCEPT_ALL); + CookieHandler.setDefault(cookies); + } + + /** + * Clear all the cookies currently in the jar. + *
+ * As long as you don't, the cookies are kept.
+ */
+ public void clearCookies() {
+ cookies.getCookieStore().removeAll();
+ }
+
+ /**
+ * Open the given {@link URL} and update the cookies.
+ *
+ * @param url
+ * the {@link URL} to open
+ * @return the {@link InputStream} of the opened page
+ *
+ * @throws IOException
+ * in case of I/O error
+ **/
+ public InputStream open(URL url) throws IOException {
+ return open(url, url, url, null, null, null, null);
+ }
+
+ /**
+ * Open the given {@link URL} and update the cookies.
+ *
+ * @param url
+ * the {@link URL} to open
+ * @param postParams
+ * the POST parameters
+ * @param getParams
+ * the GET parameters (priority over POST)
+ * @param oauth
+ * OAuth authorization (aka, "bearer XXXXXXX")
+ *
+ * @return the {@link InputStream} of the opened page
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ public InputStream open(URL url, URL currentReferer,
+ Map
+ * You can override it if you don't want the default sysout/syserr.
+ *
+ * @param message
+ * the message
+ * @param error
+ * TRUE for error messages, FALSE for information messages
+ */
+ protected void trace(String message, boolean error) {
+ if (error) {
+ System.err.println(message);
+ } else {
+ System.out.println(message);
+ }
+ }
+
+ /**
+ * Open the given {@link URL} and update the cookies.
+ *
+ * @param url
+ * the {@link URL} to open
+ * @param originalUrl
+ * the original {@link URL} before any redirection occurs
+ * @param postParams
+ * the POST parameters
+ * @param getParams
+ * the GET parameters (priority over POST)
+ * @param oauth
+ * OAuth authorisation (aka, "bearer XXXXXXX")
+ * @return the {@link InputStream} of the opened page
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ private InputStream open(URL url, final URL originalUrl,
+ URL currentReferer, Map