1 package be
.nikiroo
.utils
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
5 import java
.io
.OutputStreamWriter
;
6 import java
.net
.CookieHandler
;
7 import java
.net
.CookieManager
;
8 import java
.net
.CookiePolicy
;
9 import java
.net
.CookieStore
;
10 import java
.net
.HttpCookie
;
11 import java
.net
.HttpURLConnection
;
13 import java
.net
.URLConnection
;
14 import java
.net
.URLEncoder
;
16 import java
.util
.zip
.GZIPInputStream
;
19 * This class will help you download content from Internet Sites ({@link URL}
22 * It allows you to control some options often required on web sites that do not
23 * want to simply serve HTML, but actively makes your life difficult with stupid
28 public class Downloader
{
30 private CookieManager cookies
;
31 private TraceHandler tracer
= new TraceHandler();
35 * Create a new {@link Downloader}.
38 * the User-Agent to use to download the resources -- note that
39 * some websites require one, some actively blacklist real UAs
40 * like the one from wget, some whitelist a couple of browsers
43 public Downloader(String UA
) {
48 * Create a new {@link Downloader}.
51 * the User-Agent to use to download the resources -- note that
52 * some websites require one, some actively blacklist real UAs
53 * like the one from wget, some whitelist a couple of browsers
56 * the {@link Cache} to use for all access (can be NULL)
58 public Downloader(String UA
, Cache cache
) {
61 cookies
= new CookieManager();
62 cookies
.setCookiePolicy(CookiePolicy
.ACCEPT_ALL
);
63 CookieHandler
.setDefault(cookies
);
69 * The traces handler for this {@link Cache}.
71 * @return the traces handler
73 public TraceHandler
getTraceHandler() {
78 * The traces handler for this {@link Cache}.
81 * the new traces handler
83 public void setTraceHandler(TraceHandler tracer
) {
85 tracer
= new TraceHandler(false, false, false);
92 * The {@link Cache} to use for all access (can be NULL).
96 public Cache
getCache() {
101 * The {@link Cache} to use for all access (can be NULL).
106 public void setCache(Cache cache
) {
111 * Clear all the cookies currently in the jar.
113 * As long as you don't, the cookies are kept.
115 public void clearCookies() {
116 cookies
.getCookieStore().removeAll();
120 * Open the given {@link URL} and update the cookies.
123 * the {@link URL} to open
124 * @return the {@link InputStream} of the opened page
126 * @throws IOException
127 * in case of I/O error
129 public InputStream
open(URL url
) throws IOException
{
130 return open(url
, false);
134 * Open the given {@link URL} and update the cookies.
137 * the {@link URL} to open
139 * stable a stable file (that doesn't change too often) --
140 * parameter used to check if the file is too old to keep or not
141 * in the cache (default is false)
143 * @return the {@link InputStream} of the opened page
145 * @throws IOException
146 * in case of I/O error
148 public InputStream
open(URL url
, boolean stable
) throws IOException
{
149 return open(url
, url
, url
, null, null, null, null, stable
);
153 * Open the given {@link URL} and update the cookies.
156 * the {@link URL} to open
157 * @param currentReferer
158 * the current referer, for websites that needs this info
159 * @param cookiesValues
162 * the POST parameters
164 * the GET parameters (priority over POST)
166 * OAuth authorization (aka, "bearer XXXXXXX")
168 * @return the {@link InputStream} of the opened page
170 * @throws IOException
171 * in case of I/O error
173 public InputStream
open(URL url
, URL currentReferer
,
174 Map
<String
, String
> cookiesValues
, Map
<String
, String
> postParams
,
175 Map
<String
, String
> getParams
, String oauth
) throws IOException
{
176 return open(url
, currentReferer
, cookiesValues
, postParams
, getParams
,
181 * Open the given {@link URL} and update the cookies.
184 * the {@link URL} to open
185 * @param currentReferer
186 * the current referer, for websites that needs this info
187 * @param cookiesValues
190 * the POST parameters
192 * the GET parameters (priority over POST)
194 * OAuth authorization (aka, "bearer XXXXXXX")
196 * stable a stable file (that doesn't change too often) --
197 * parameter used to check if the file is too old to keep or not
198 * in the cache (default is false)
200 * @return the {@link InputStream} of the opened page
202 * @throws IOException
203 * in case of I/O error
205 public InputStream
open(URL url
, URL currentReferer
,
206 Map
<String
, String
> cookiesValues
, Map
<String
, String
> postParams
,
207 Map
<String
, String
> getParams
, String oauth
, boolean stable
)
209 return open(url
, url
, currentReferer
, cookiesValues
, postParams
,
210 getParams
, oauth
, stable
);
214 * Open the given {@link URL} and update the cookies.
217 * the {@link URL} to open
219 * the original {@link URL} before any redirection occurs
221 * the POST parameters
223 * the GET parameters (priority over POST)
225 * OAuth authorisation (aka, "bearer XXXXXXX")
227 * a stable file (that doesn't change too often) -- parameter
228 * used to check if the file is too old to keep or not in the
231 * @return the {@link InputStream} of the opened page
233 * @throws IOException
234 * in case of I/O error
236 private InputStream
open(URL url
, final URL originalUrl
,
237 URL currentReferer
, Map
<String
, String
> cookiesValues
,
238 Map
<String
, String
> postParams
, Map
<String
, String
> getParams
,
239 String oauth
, boolean stable
) throws IOException
{
241 tracer
.trace("Request: " + url
);
244 InputStream in
= cache
.load(url
, false, stable
);
246 tracer
.trace("Take from cache: " + url
);
251 tracer
.trace("Download: " + url
);
253 URLConnection conn
= openConnectionWithCookies(url
, currentReferer
,
256 // Priority: GET over POST
257 Map
<String
, String
> params
= getParams
;
258 if (getParams
== null) {
262 if ((params
!= null || oauth
!= null)
263 && conn
instanceof HttpURLConnection
) {
264 StringBuilder requestData
= null;
265 if (params
!= null) {
266 requestData
= new StringBuilder();
267 for (Map
.Entry
<String
, String
> param
: params
.entrySet()) {
268 if (requestData
.length() != 0)
269 requestData
.append('&');
270 requestData
.append(URLEncoder
.encode(param
.getKey(),
272 requestData
.append('=');
273 requestData
.append(URLEncoder
.encode(
274 String
.valueOf(param
.getValue()), "UTF-8"));
277 conn
.setDoOutput(true);
279 if (getParams
== null && postParams
!= null) {
280 ((HttpURLConnection
) conn
).setRequestMethod("POST");
283 conn
.setRequestProperty("Content-Type",
284 "application/x-www-form-urlencoded");
285 conn
.setRequestProperty("charset", "utf-8");
289 conn
.setRequestProperty("Authorization", oauth
);
292 if (requestData
!= null) {
293 OutputStreamWriter writer
= null;
295 writer
= new OutputStreamWriter(conn
.getOutputStream());
296 writer
.write(requestData
.toString());
299 if (writer
!= null) {
309 if (conn
instanceof HttpURLConnection
) {
312 // Can fail in some circumstances
313 repCode
= ((HttpURLConnection
) conn
).getResponseCode();
314 } catch (IOException e
) {
317 if (repCode
/ 100 == 3) {
318 String newUrl
= conn
.getHeaderField("Location");
319 return open(new URL(newUrl
), originalUrl
, currentReferer
,
320 cookiesValues
, postParams
, getParams
, oauth
, stable
);
324 InputStream in
= conn
.getInputStream();
325 if ("gzip".equals(conn
.getContentEncoding())) {
326 in
= new GZIPInputStream(in
);
329 if (in
!= null && cache
!= null) {
330 tracer
.trace("Save to cache: " + url
);
333 } catch (IOException e
) {
334 tracer
.error(new IOException(
335 "Cannot save URL to cache, will ignore cache: " + url
,
344 * Open a connection on the given {@link URL}, and manage the cookies that
348 * the {@link URL} to open
350 * @return the connection
352 * @throws IOException
353 * in case of I/O error
355 private URLConnection
openConnectionWithCookies(URL url
,
356 URL currentReferer
, Map
<String
, String
> cookiesValues
)
358 URLConnection conn
= url
.openConnection();
360 conn
.setRequestProperty("User-Agent", UA
);
361 conn
.setRequestProperty("Cookie", generateCookies(cookiesValues
));
362 conn
.setRequestProperty("Accept-Encoding", "gzip");
363 if (currentReferer
!= null) {
364 conn
.setRequestProperty("Referer", currentReferer
.toString());
365 conn
.setRequestProperty("Host", currentReferer
.getHost());
372 * Generate the cookie {@link String} from the local {@link CookieStore} so
373 * it is ready to be passed.
377 private String
generateCookies(Map
<String
, String
> cookiesValues
) {
378 StringBuilder builder
= new StringBuilder();
379 for (HttpCookie cookie
: cookies
.getCookieStore().getCookies()) {
380 if (builder
.length() > 0) {
384 // TODO: check if format is ok
385 builder
.append(cookie
.toString());
388 if (cookiesValues
!= null) {
389 for (Map
.Entry
<String
, String
> set
: cookiesValues
.entrySet()) {
390 if (builder
.length() > 0) {
393 builder
.append(set
.getKey());
395 builder
.append(set
.getValue());
399 return builder
.toString();