package be.nikiroo.utils; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; import java.net.CookieHandler; import java.net.CookieManager; import java.net.CookiePolicy; import java.net.CookieStore; import java.net.HttpCookie; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLConnection; import java.net.URLEncoder; import java.util.Map; import java.util.zip.GZIPInputStream; /** * This class will help you download content from Internet Sites ({@link URL} * based). *
* It allows you to control some options often required on web sites that do not * want to simply serve HTML, but actively makes your life difficult with stupid * checks. * * @author niki */ public class Downloader { private String UA; private CookieManager cookies; /** * Create a new {@link Downloader}. * * @param UA * the User-Agent to use to download the resources -- note that * some websites require one, some actively blacklist real UAs * like the one from wget, some whitelist a couple of browsers * only (!) */ public Downloader(String UA) { this.UA = UA; cookies = new CookieManager(); cookies.setCookiePolicy(CookiePolicy.ACCEPT_ALL); CookieHandler.setDefault(cookies); } /** * Clear all the cookies currently in the jar. *
* As long as you don't, the cookies are kept.
*/
public void clearCookies() {
cookies.getCookieStore().removeAll();
}
/**
* Open the given {@link URL} and update the cookies.
*
* @param url
* the {@link URL} to open
* @return the {@link InputStream} of the opened page
*
* @throws IOException
* in case of I/O error
**/
public InputStream open(URL url) throws IOException {
return open(url, url, url, null, null, null, null);
}
/**
* Open the given {@link URL} and update the cookies.
*
* @param url
* the {@link URL} to open
* @param postParams
* the POST parameters
* @param getParams
* the GET parameters (priority over POST)
* @param oauth
* OAuth authorization (aka, "bearer XXXXXXX")
*
* @return the {@link InputStream} of the opened page
*
* @throws IOException
* in case of I/O error
*/
public InputStream open(URL url, URL currentReferer,
Map
* You can override it if you don't want the default sysout/syserr.
*
* @param message
* the message
* @param error
* TRUE for error messages, FALSE for information messages
*/
protected void trace(String message, boolean error) {
if (error) {
System.err.println(message);
} else {
System.out.println(message);
}
}
/**
* Open the given {@link URL} and update the cookies.
*
* @param url
* the {@link URL} to open
* @param originalUrl
* the original {@link URL} before any redirection occurs
* @param postParams
* the POST parameters
* @param getParams
* the GET parameters (priority over POST)
* @param oauth
* OAuth authorisation (aka, "bearer XXXXXXX")
* @return the {@link InputStream} of the opened page
*
* @throws IOException
* in case of I/O error
*/
private InputStream open(URL url, final URL originalUrl,
URL currentReferer, Map