1 package be
.nikiroo
.utils
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
5 import java
.io
.OutputStreamWriter
;
6 import java
.net
.CookieHandler
;
7 import java
.net
.CookieManager
;
8 import java
.net
.CookiePolicy
;
9 import java
.net
.CookieStore
;
10 import java
.net
.HttpCookie
;
11 import java
.net
.HttpURLConnection
;
13 import java
.net
.URLConnection
;
14 import java
.net
.URLEncoder
;
16 import java
.util
.zip
.GZIPInputStream
;
19 * This class will help you download content from Internet Sites ({@link URL}
22 * It allows you to control some options often required on web sites that do not
23 * want to simply serve HTML, but actively makes your life difficult with stupid
28 public class Downloader
{
30 private CookieManager cookies
;
31 private TraceHandler tracer
= new TraceHandler();
35 * Create a new {@link Downloader}.
38 * the User-Agent to use to download the resources -- note that
39 * some websites require one, some actively blacklist real UAs
40 * like the one from wget, some whitelist a couple of browsers
43 public Downloader(String UA
) {
48 * Create a new {@link Downloader}.
51 * the User-Agent to use to download the resources -- note that
52 * some websites require one, some actively blacklist real UAs
53 * like the one from wget, some whitelist a couple of browsers
56 * the {@link Cache} to use for all access (can be NULL)
58 public Downloader(String UA
, Cache cache
) {
61 cookies
= new CookieManager();
62 cookies
.setCookiePolicy(CookiePolicy
.ACCEPT_ALL
);
63 CookieHandler
.setDefault(cookies
);
69 * The traces handler for this {@link Cache}.
71 * @return the traces handler
73 public TraceHandler
getTraceHandler() {
78 * The traces handler for this {@link Cache}.
81 * the new traces handler
83 public void setTraceHandler(TraceHandler tracer
) {
85 tracer
= new TraceHandler(false, false, false);
92 * Clear all the cookies currently in the jar.
94 * As long as you don't, the cookies are kept.
96 public void clearCookies() {
97 cookies
.getCookieStore().removeAll();
101 * Open the given {@link URL} and update the cookies.
104 * the {@link URL} to open
105 * @return the {@link InputStream} of the opened page
107 * @throws IOException
108 * in case of I/O error
110 public InputStream
open(URL url
) throws IOException
{
111 return open(url
, false);
115 * Open the given {@link URL} and update the cookies.
118 * the {@link URL} to open
120 * stable a stable file (that doesn't change too often) --
121 * parameter used to check if the file is too old to keep or not
122 * in the cache (default is false)
124 * @return the {@link InputStream} of the opened page
126 * @throws IOException
127 * in case of I/O error
129 public InputStream
open(URL url
, boolean stable
) throws IOException
{
130 return open(url
, url
, url
, null, null, null, null, stable
);
134 * Open the given {@link URL} and update the cookies.
137 * the {@link URL} to open
138 * @param currentReferer
139 * the current referer, for websites that needs this info
140 * @param cookiesValues
143 * the POST parameters
145 * the GET parameters (priority over POST)
147 * OAuth authorization (aka, "bearer XXXXXXX")
149 * @return the {@link InputStream} of the opened page
151 * @throws IOException
152 * in case of I/O error
154 public InputStream
open(URL url
, URL currentReferer
,
155 Map
<String
, String
> cookiesValues
, Map
<String
, String
> postParams
,
156 Map
<String
, String
> getParams
, String oauth
) throws IOException
{
157 return open(url
, currentReferer
, cookiesValues
, postParams
, getParams
,
162 * Open the given {@link URL} and update the cookies.
165 * the {@link URL} to open
166 * @param currentReferer
167 * the current referer, for websites that needs this info
168 * @param cookiesValues
171 * the POST parameters
173 * the GET parameters (priority over POST)
175 * OAuth authorization (aka, "bearer XXXXXXX")
177 * stable a stable file (that doesn't change too often) --
178 * parameter used to check if the file is too old to keep or not
179 * in the cache (default is false)
181 * @return the {@link InputStream} of the opened page
183 * @throws IOException
184 * in case of I/O error
186 public InputStream
open(URL url
, URL currentReferer
,
187 Map
<String
, String
> cookiesValues
, Map
<String
, String
> postParams
,
188 Map
<String
, String
> getParams
, String oauth
, boolean stable
)
190 return open(url
, url
, currentReferer
, cookiesValues
, postParams
,
191 getParams
, oauth
, stable
);
195 * Open the given {@link URL} and update the cookies.
198 * the {@link URL} to open
200 * the original {@link URL} before any redirection occurs
202 * the POST parameters
204 * the GET parameters (priority over POST)
206 * OAuth authorisation (aka, "bearer XXXXXXX")
208 * a stable file (that doesn't change too often) -- parameter
209 * used to check if the file is too old to keep or not in the
212 * @return the {@link InputStream} of the opened page
214 * @throws IOException
215 * in case of I/O error
217 private InputStream
open(URL url
, final URL originalUrl
,
218 URL currentReferer
, Map
<String
, String
> cookiesValues
,
219 Map
<String
, String
> postParams
, Map
<String
, String
> getParams
,
220 String oauth
, boolean stable
) throws IOException
{
222 tracer
.trace("Request: " + url
);
225 InputStream in
= cache
.load(url
, false, stable
);
227 tracer
.trace("Take from cache: " + url
);
232 tracer
.trace("Download: " + url
);
234 URLConnection conn
= openConnectionWithCookies(url
, currentReferer
,
237 // Priority: GET over POST
238 Map
<String
, String
> params
= getParams
;
239 if (getParams
== null) {
243 if ((params
!= null || oauth
!= null)
244 && conn
instanceof HttpURLConnection
) {
245 StringBuilder requestData
= null;
246 if (params
!= null) {
247 requestData
= new StringBuilder();
248 for (Map
.Entry
<String
, String
> param
: params
.entrySet()) {
249 if (requestData
.length() != 0)
250 requestData
.append('&');
251 requestData
.append(URLEncoder
.encode(param
.getKey(),
253 requestData
.append('=');
254 requestData
.append(URLEncoder
.encode(
255 String
.valueOf(param
.getValue()), "UTF-8"));
258 conn
.setDoOutput(true);
260 if (getParams
== null && postParams
!= null) {
261 ((HttpURLConnection
) conn
).setRequestMethod("POST");
264 conn
.setRequestProperty("Content-Type",
265 "application/x-www-form-urlencoded");
266 conn
.setRequestProperty("charset", "utf-8");
270 conn
.setRequestProperty("Authorization", oauth
);
273 if (requestData
!= null) {
274 OutputStreamWriter writer
= null;
276 writer
= new OutputStreamWriter(conn
.getOutputStream());
277 writer
.write(requestData
.toString());
280 if (writer
!= null) {
290 if (conn
instanceof HttpURLConnection
) {
293 // Can fail in some circumstances
294 repCode
= ((HttpURLConnection
) conn
).getResponseCode();
295 } catch (IOException e
) {
298 if (repCode
/ 100 == 3) {
299 String newUrl
= conn
.getHeaderField("Location");
300 return open(new URL(newUrl
), originalUrl
, currentReferer
,
301 cookiesValues
, postParams
, getParams
, oauth
, stable
);
305 InputStream in
= conn
.getInputStream();
306 if ("gzip".equals(conn
.getContentEncoding())) {
307 in
= new GZIPInputStream(in
);
310 if (in
!= null && cache
!= null) {
311 tracer
.trace("Save to cache: " + url
);
314 } catch (IOException e
) {
315 tracer
.error(new IOException(
316 "Cannot save URL to cache, will ignore cache: " + url
,
325 * Open a connection on the given {@link URL}, and manage the cookies that
329 * the {@link URL} to open
331 * @return the connection
333 * @throws IOException
334 * in case of I/O error
336 private URLConnection
openConnectionWithCookies(URL url
,
337 URL currentReferer
, Map
<String
, String
> cookiesValues
)
339 URLConnection conn
= url
.openConnection();
341 conn
.setRequestProperty("User-Agent", UA
);
342 conn
.setRequestProperty("Cookie", generateCookies(cookiesValues
));
343 conn
.setRequestProperty("Accept-Encoding", "gzip");
344 if (currentReferer
!= null) {
345 conn
.setRequestProperty("Referer", currentReferer
.toString());
346 conn
.setRequestProperty("Host", currentReferer
.getHost());
353 * Generate the cookie {@link String} from the local {@link CookieStore} so
354 * it is ready to be passed.
358 private String
generateCookies(Map
<String
, String
> cookiesValues
) {
359 StringBuilder builder
= new StringBuilder();
360 for (HttpCookie cookie
: cookies
.getCookieStore().getCookies()) {
361 if (builder
.length() > 0) {
365 // TODO: check if format is ok
366 builder
.append(cookie
.toString());
369 if (cookiesValues
!= null) {
370 for (Map
.Entry
<String
, String
> set
: cookiesValues
.entrySet()) {
371 if (builder
.length() > 0) {
374 builder
.append(set
.getKey());
376 builder
.append(set
.getValue());
380 return builder
.toString();