1 package be
.nikiroo
.utils
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
5 import java
.io
.OutputStreamWriter
;
6 import java
.net
.CookieHandler
;
7 import java
.net
.CookieManager
;
8 import java
.net
.CookiePolicy
;
9 import java
.net
.CookieStore
;
10 import java
.net
.HttpCookie
;
11 import java
.net
.HttpURLConnection
;
13 import java
.net
.URLConnection
;
14 import java
.net
.URLEncoder
;
16 import java
.util
.zip
.GZIPInputStream
;
19 * This class will help you download content from Internet Sites ({@link URL}
22 * It allows you to control some options often required on web sites that do not
23 * want to simply serve HTML, but actively makes your life difficult with stupid
28 public class Downloader
{
30 private CookieManager cookies
;
31 private TraceHandler tracer
= new TraceHandler();
35 * Create a new {@link Downloader}.
38 * the User-Agent to use to download the resources -- note that
39 * some websites require one, some actively blacklist real UAs
40 * like the one from wget, some whitelist a couple of browsers
43 public Downloader(String UA
) {
48 * Create a new {@link Downloader}.
51 * the User-Agent to use to download the resources -- note that
52 * some websites require one, some actively blacklist real UAs
53 * like the one from wget, some whitelist a couple of browsers
56 * the {@link Cache} to use for all access (can be NULL)
58 public Downloader(String UA
, Cache cache
) {
61 cookies
= new CookieManager();
62 cookies
.setCookiePolicy(CookiePolicy
.ACCEPT_ALL
);
63 CookieHandler
.setDefault(cookies
);
69 * The traces handler for this {@link Cache}.
71 * @return the traces handler
73 public TraceHandler
getTraceHandler() {
78 * The traces handler for this {@link Cache}.
81 * the new traces handler
83 public void setTraceHandler(TraceHandler tracer
) {
85 tracer
= new TraceHandler(false, false, false);
92 * The {@link Cache} to use for all access (can be NULL).
96 public Cache
getCache() {
101 * The {@link Cache} to use for all access (can be NULL).
106 public void setCache(Cache cache
) {
111 * Clear all the cookies currently in the jar.
113 * As long as you don't, the cookies are kept.
115 public void clearCookies() {
116 cookies
.getCookieStore().removeAll();
120 * Open the given {@link URL} and update the cookies.
123 * the {@link URL} to open
124 * @return the {@link InputStream} of the opened page
126 * @throws IOException
127 * in case of I/O error
129 public InputStream
open(URL url
) throws IOException
{
130 return open(url
, false);
134 * Open the given {@link URL} and update the cookies.
137 * the {@link URL} to open
139 * stable a stable file (that doesn't change too often) --
140 * parameter used to check if the file is too old to keep or not
141 * in the cache (default is false)
143 * @return the {@link InputStream} of the opened page
145 * @throws IOException
146 * in case of I/O error
148 public InputStream
open(URL url
, boolean stable
) throws IOException
{
149 return open(url
, url
, url
, null, null, null, null, stable
);
153 * Open the given {@link URL} and update the cookies.
156 * the {@link URL} to open
157 * @param currentReferer
158 * the current referer, for websites that needs this info
159 * @param cookiesValues
162 * the POST parameters
164 * the GET parameters (priority over POST)
166 * OAuth authorization (aka, "bearer XXXXXXX")
168 * @return the {@link InputStream} of the opened page
170 * @throws IOException
171 * in case of I/O error
173 public InputStream
open(URL url
, URL currentReferer
,
174 Map
<String
, String
> cookiesValues
, Map
<String
, String
> postParams
,
175 Map
<String
, String
> getParams
, String oauth
) throws IOException
{
176 return open(url
, currentReferer
, cookiesValues
, postParams
, getParams
,
181 * Open the given {@link URL} and update the cookies.
184 * the {@link URL} to open
185 * @param currentReferer
186 * the current referer, for websites that needs this info
187 * @param cookiesValues
190 * the POST parameters
192 * the GET parameters (priority over POST)
194 * OAuth authorization (aka, "bearer XXXXXXX")
196 * stable a stable file (that doesn't change too often) --
197 * parameter used to check if the file is too old to keep or not
198 * in the cache (default is false)
200 * @return the {@link InputStream} of the opened page
202 * @throws IOException
203 * in case of I/O error
205 public InputStream
open(URL url
, URL currentReferer
,
206 Map
<String
, String
> cookiesValues
, Map
<String
, String
> postParams
,
207 Map
<String
, String
> getParams
, String oauth
, boolean stable
)
209 return open(url
, url
, currentReferer
, cookiesValues
, postParams
,
210 getParams
, oauth
, stable
);
214 * Open the given {@link URL} and update the cookies.
217 * the {@link URL} to open
219 * the original {@link URL} before any redirection occurs, which
220 * is also used for the cache ID if needed (so we can retrieve
221 * the content with this URL if needed)
222 * @param currentReferer
223 * the current referer, for websites that needs this info
224 * @param cookiesValues
227 * the POST parameters
229 * the GET parameters (priority over POST)
231 * OAuth authorisation (aka, "bearer XXXXXXX")
233 * a stable file (that doesn't change too often) -- parameter
234 * used to check if the file is too old to keep or not in the
237 * @return the {@link InputStream} of the opened page
239 * @throws IOException
240 * in case of I/O error
242 public InputStream
open(URL url
, final URL originalUrl
, URL currentReferer
,
243 Map
<String
, String
> cookiesValues
, Map
<String
, String
> postParams
,
244 Map
<String
, String
> getParams
, String oauth
, boolean stable
)
247 tracer
.trace("Request: " + url
);
250 InputStream in
= cache
.load(originalUrl
, false, stable
);
252 tracer
.trace("Use the cache: " + url
);
253 tracer
.trace("Original URL : " + originalUrl
);
258 tracer
.trace("Download: " + url
);
260 URLConnection conn
= openConnectionWithCookies(url
, currentReferer
,
263 // Priority: GET over POST
264 Map
<String
, String
> params
= getParams
;
265 if (getParams
== null) {
269 if ((params
!= null || oauth
!= null)
270 && conn
instanceof HttpURLConnection
) {
271 StringBuilder requestData
= null;
272 if (params
!= null) {
273 requestData
= new StringBuilder();
274 for (Map
.Entry
<String
, String
> param
: params
.entrySet()) {
275 if (requestData
.length() != 0)
276 requestData
.append('&');
277 requestData
.append(URLEncoder
.encode(param
.getKey(),
279 requestData
.append('=');
280 requestData
.append(URLEncoder
.encode(
281 String
.valueOf(param
.getValue()), "UTF-8"));
284 conn
.setDoOutput(true);
286 if (getParams
== null && postParams
!= null) {
287 ((HttpURLConnection
) conn
).setRequestMethod("POST");
290 conn
.setRequestProperty("Content-Type",
291 "application/x-www-form-urlencoded");
292 conn
.setRequestProperty("charset", "utf-8");
296 conn
.setRequestProperty("Authorization", oauth
);
299 if (requestData
!= null) {
300 OutputStreamWriter writer
= null;
302 writer
= new OutputStreamWriter(conn
.getOutputStream());
303 writer
.write(requestData
.toString());
306 if (writer
!= null) {
316 if (conn
instanceof HttpURLConnection
) {
319 // Can fail in some circumstances
320 repCode
= ((HttpURLConnection
) conn
).getResponseCode();
321 } catch (IOException e
) {
324 if (repCode
/ 100 == 3) {
325 String newUrl
= conn
.getHeaderField("Location");
326 return open(new URL(newUrl
), originalUrl
, currentReferer
,
327 cookiesValues
, postParams
, getParams
, oauth
, stable
);
331 InputStream in
= conn
.getInputStream();
332 if ("gzip".equals(conn
.getContentEncoding())) {
333 in
= new GZIPInputStream(in
);
336 if (in
!= null && cache
!= null) {
337 tracer
.trace("Save to cache: " + originalUrl
);
340 cache
.save(in
, originalUrl
);
344 in
= cache
.load(originalUrl
, true, false);
345 } catch (IOException e
) {
346 tracer
.error(new IOException(
347 "Cannot save URL to cache, will ignore cache: " + url
,
356 * Open a connection on the given {@link URL}, and manage the cookies that
360 * the {@link URL} to open
362 * @return the connection
364 * @throws IOException
365 * in case of I/O error
367 private URLConnection
openConnectionWithCookies(URL url
,
368 URL currentReferer
, Map
<String
, String
> cookiesValues
)
370 URLConnection conn
= url
.openConnection();
372 conn
.setRequestProperty("User-Agent", UA
);
373 conn
.setRequestProperty("Cookie", generateCookies(cookiesValues
));
374 conn
.setRequestProperty("Accept-Encoding", "gzip");
375 if (currentReferer
!= null) {
376 conn
.setRequestProperty("Referer", currentReferer
.toString());
377 conn
.setRequestProperty("Host", currentReferer
.getHost());
384 * Generate the cookie {@link String} from the local {@link CookieStore} so
385 * it is ready to be passed.
389 private String
generateCookies(Map
<String
, String
> cookiesValues
) {
390 StringBuilder builder
= new StringBuilder();
391 for (HttpCookie cookie
: cookies
.getCookieStore().getCookies()) {
392 if (builder
.length() > 0) {
396 builder
.append(cookie
.toString());
399 if (cookiesValues
!= null) {
400 for (Map
.Entry
<String
, String
> set
: cookiesValues
.entrySet()) {
401 if (builder
.length() > 0) {
404 builder
.append(set
.getKey());
406 builder
.append(set
.getValue());
410 return builder
.toString();