1 package be
.nikiroo
.utils
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
5 import java
.io
.OutputStreamWriter
;
6 import java
.net
.CookieHandler
;
7 import java
.net
.CookieManager
;
8 import java
.net
.CookiePolicy
;
9 import java
.net
.CookieStore
;
10 import java
.net
.HttpCookie
;
11 import java
.net
.HttpURLConnection
;
13 import java
.net
.URLConnection
;
14 import java
.net
.URLEncoder
;
16 import java
.util
.zip
.GZIPInputStream
;
19 * This class will help you download content from Internet Sites ({@link URL}
22 * It allows you to control some options often required on web sites that do not
23 * want to simply serve HTML, but actively makes your life difficult with stupid
28 public class Downloader
{
30 private CookieManager cookies
;
31 private TraceHandler tracer
= new TraceHandler();
34 * Create a new {@link Downloader}.
37 * the User-Agent to use to download the resources -- note that
38 * some websites require one, some actively blacklist real UAs
39 * like the one from wget, some whitelist a couple of browsers
42 public Downloader(String UA
) {
45 cookies
= new CookieManager();
46 cookies
.setCookiePolicy(CookiePolicy
.ACCEPT_ALL
);
47 CookieHandler
.setDefault(cookies
);
51 * The traces handler for this {@link Cache}.
53 * @return the traces handler
55 public TraceHandler
getTraceHandler() {
60 * The traces handler for this {@link Cache}.
63 * the new traces handler
65 public void setTraceHandler(TraceHandler tracer
) {
70 * Clear all the cookies currently in the jar.
72 * As long as you don't, the cookies are kept.
74 public void clearCookies() {
75 cookies
.getCookieStore().removeAll();
79 * Open the given {@link URL} and update the cookies.
82 * the {@link URL} to open
83 * @return the {@link InputStream} of the opened page
86 * in case of I/O error
88 public InputStream
open(URL url
) throws IOException
{
89 return open(url
, url
, url
, null, null, null, null);
93 * Open the given {@link URL} and update the cookies.
96 * the {@link URL} to open
97 * @param currentReferer
98 * the current referer, for websites that needs this info
99 * @param cookiesValues
102 * the POST parameters
104 * the GET parameters (priority over POST)
106 * OAuth authorization (aka, "bearer XXXXXXX")
108 * @return the {@link InputStream} of the opened page
110 * @throws IOException
111 * in case of I/O error
113 public InputStream
open(URL url
, URL currentReferer
,
114 Map
<String
, String
> cookiesValues
, Map
<String
, String
> postParams
,
115 Map
<String
, String
> getParams
, String oauth
) throws IOException
{
116 return open(url
, url
, currentReferer
, cookiesValues
, postParams
,
121 * Open the given {@link URL} and update the cookies.
124 * the {@link URL} to open
126 * the original {@link URL} before any redirection occurs
128 * the POST parameters
130 * the GET parameters (priority over POST)
132 * OAuth authorisation (aka, "bearer XXXXXXX")
133 * @return the {@link InputStream} of the opened page
135 * @throws IOException
136 * in case of I/O error
138 private InputStream
open(URL url
, final URL originalUrl
,
139 URL currentReferer
, Map
<String
, String
> cookiesValues
,
140 Map
<String
, String
> postParams
, Map
<String
, String
> getParams
,
141 String oauth
) throws IOException
{
143 tracer
.trace("Download: " + url
);
145 URLConnection conn
= openConnectionWithCookies(url
, currentReferer
,
148 // Priority: GET over POST
149 Map
<String
, String
> params
= getParams
;
150 if (getParams
== null) {
154 if ((params
!= null || oauth
!= null)
155 && conn
instanceof HttpURLConnection
) {
156 StringBuilder requestData
= null;
157 if (params
!= null) {
158 requestData
= new StringBuilder();
159 for (Map
.Entry
<String
, String
> param
: params
.entrySet()) {
160 if (requestData
.length() != 0)
161 requestData
.append('&');
162 requestData
.append(URLEncoder
.encode(param
.getKey(),
164 requestData
.append('=');
165 requestData
.append(URLEncoder
.encode(
166 String
.valueOf(param
.getValue()), "UTF-8"));
169 conn
.setDoOutput(true);
171 if (getParams
== null && postParams
!= null) {
172 ((HttpURLConnection
) conn
).setRequestMethod("POST");
175 conn
.setRequestProperty("Content-Type",
176 "application/x-www-form-urlencoded");
177 conn
.setRequestProperty("charset", "utf-8");
181 conn
.setRequestProperty("Authorization", oauth
);
184 if (requestData
!= null) {
185 OutputStreamWriter writer
= new OutputStreamWriter(
186 conn
.getOutputStream());
188 writer
.write(requestData
.toString());
197 if (conn
instanceof HttpURLConnection
198 && ((HttpURLConnection
) conn
).getResponseCode() / 100 == 3) {
199 String newUrl
= conn
.getHeaderField("Location");
200 return open(new URL(newUrl
), originalUrl
, currentReferer
,
201 cookiesValues
, postParams
, getParams
, oauth
);
204 InputStream in
= conn
.getInputStream();
205 if ("gzip".equals(conn
.getContentEncoding())) {
206 in
= new GZIPInputStream(in
);
213 * Open a connection on the given {@link URL}, and manage the cookies that
217 * the {@link URL} to open
219 * @return the connection
221 * @throws IOException
222 * in case of I/O error
224 private URLConnection
openConnectionWithCookies(URL url
,
225 URL currentReferer
, Map
<String
, String
> cookiesValues
)
227 URLConnection conn
= url
.openConnection();
229 conn
.setRequestProperty("User-Agent", UA
);
230 conn
.setRequestProperty("Cookie", generateCookies(cookiesValues
));
231 conn
.setRequestProperty("Accept-Encoding", "gzip");
232 if (currentReferer
!= null) {
233 conn
.setRequestProperty("Referer", currentReferer
.toString());
234 conn
.setRequestProperty("Host", currentReferer
.getHost());
241 * Generate the cookie {@link String} from the local {@link CookieStore} so
242 * it is ready to be passed.
246 private String
generateCookies(Map
<String
, String
> cookiesValues
) {
247 StringBuilder builder
= new StringBuilder();
248 for (HttpCookie cookie
: cookies
.getCookieStore().getCookies()) {
249 if (builder
.length() > 0) {
253 // TODO: check if format is ok
254 builder
.append(cookie
.toString());
257 if (cookiesValues
!= null) {
258 for (Map
.Entry
<String
, String
> set
: cookiesValues
.entrySet()) {
259 if (builder
.length() > 0) {
262 builder
.append(set
.getKey());
264 builder
.append(set
.getValue());
268 return builder
.toString();