1 package be
.nikiroo
.utils
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
5 import java
.io
.OutputStreamWriter
;
6 import java
.net
.CookieHandler
;
7 import java
.net
.CookieManager
;
8 import java
.net
.CookiePolicy
;
9 import java
.net
.CookieStore
;
10 import java
.net
.HttpCookie
;
11 import java
.net
.HttpURLConnection
;
13 import java
.net
.URLConnection
;
14 import java
.net
.URLEncoder
;
16 import java
.util
.zip
.GZIPInputStream
;
19 * This class will help you download content from Internet Sites ({@link URL}
22 * It allows you to control some options often required on web sites that do not
23 * want to simply serve HTML, but actively makes your life difficult with stupid
28 public class Downloader
{
30 private CookieManager cookies
;
31 private TraceHandler tracer
= new TraceHandler();
34 * Create a new {@link Downloader}.
37 * the User-Agent to use to download the resources -- note that
38 * some websites require one, some actively blacklist real UAs
39 * like the one from wget, some whitelist a couple of browsers
42 public Downloader(String UA
) {
45 cookies
= new CookieManager();
46 cookies
.setCookiePolicy(CookiePolicy
.ACCEPT_ALL
);
47 CookieHandler
.setDefault(cookies
);
51 * The traces handler for this {@link Cache}.
53 * @return the traces handler
55 public TraceHandler
getTraceHandler() {
60 * The traces handler for this {@link Cache}.
63 * the new traces handler
65 public void setTraceHandler(TraceHandler tracer
) {
67 tracer
= new TraceHandler(false, false, false);
74 * Clear all the cookies currently in the jar.
76 * As long as you don't, the cookies are kept.
78 public void clearCookies() {
79 cookies
.getCookieStore().removeAll();
83 * Open the given {@link URL} and update the cookies.
86 * the {@link URL} to open
87 * @return the {@link InputStream} of the opened page
90 * in case of I/O error
92 public InputStream
open(URL url
) throws IOException
{
93 return open(url
, url
, url
, null, null, null, null);
97 * Open the given {@link URL} and update the cookies.
100 * the {@link URL} to open
101 * @param currentReferer
102 * the current referer, for websites that needs this info
103 * @param cookiesValues
106 * the POST parameters
108 * the GET parameters (priority over POST)
110 * OAuth authorization (aka, "bearer XXXXXXX")
112 * @return the {@link InputStream} of the opened page
114 * @throws IOException
115 * in case of I/O error
117 public InputStream
open(URL url
, URL currentReferer
,
118 Map
<String
, String
> cookiesValues
, Map
<String
, String
> postParams
,
119 Map
<String
, String
> getParams
, String oauth
) throws IOException
{
120 return open(url
, url
, currentReferer
, cookiesValues
, postParams
,
125 * Open the given {@link URL} and update the cookies.
128 * the {@link URL} to open
130 * the original {@link URL} before any redirection occurs
132 * the POST parameters
134 * the GET parameters (priority over POST)
136 * OAuth authorisation (aka, "bearer XXXXXXX")
137 * @return the {@link InputStream} of the opened page
139 * @throws IOException
140 * in case of I/O error
142 private InputStream
open(URL url
, final URL originalUrl
,
143 URL currentReferer
, Map
<String
, String
> cookiesValues
,
144 Map
<String
, String
> postParams
, Map
<String
, String
> getParams
,
145 String oauth
) throws IOException
{
147 tracer
.trace("Download: " + url
);
149 URLConnection conn
= openConnectionWithCookies(url
, currentReferer
,
152 // Priority: GET over POST
153 Map
<String
, String
> params
= getParams
;
154 if (getParams
== null) {
158 if ((params
!= null || oauth
!= null)
159 && conn
instanceof HttpURLConnection
) {
160 StringBuilder requestData
= null;
161 if (params
!= null) {
162 requestData
= new StringBuilder();
163 for (Map
.Entry
<String
, String
> param
: params
.entrySet()) {
164 if (requestData
.length() != 0)
165 requestData
.append('&');
166 requestData
.append(URLEncoder
.encode(param
.getKey(),
168 requestData
.append('=');
169 requestData
.append(URLEncoder
.encode(
170 String
.valueOf(param
.getValue()), "UTF-8"));
173 conn
.setDoOutput(true);
175 if (getParams
== null && postParams
!= null) {
176 ((HttpURLConnection
) conn
).setRequestMethod("POST");
179 conn
.setRequestProperty("Content-Type",
180 "application/x-www-form-urlencoded");
181 conn
.setRequestProperty("charset", "utf-8");
185 conn
.setRequestProperty("Authorization", oauth
);
188 if (requestData
!= null) {
189 OutputStreamWriter writer
= null;
191 writer
= new OutputStreamWriter(conn
.getOutputStream());
192 writer
.write(requestData
.toString());
195 if (writer
!= null) {
205 if (conn
instanceof HttpURLConnection
) {
208 // Can fail in some circumstances
209 repCode
= ((HttpURLConnection
) conn
).getResponseCode();
210 } catch (IOException e
) {
213 if (repCode
/ 100 == 3) {
214 String newUrl
= conn
.getHeaderField("Location");
215 return open(new URL(newUrl
), originalUrl
, currentReferer
,
216 cookiesValues
, postParams
, getParams
, oauth
);
220 InputStream in
= conn
.getInputStream();
221 if ("gzip".equals(conn
.getContentEncoding())) {
222 in
= new GZIPInputStream(in
);
229 * Open a connection on the given {@link URL}, and manage the cookies that
233 * the {@link URL} to open
235 * @return the connection
237 * @throws IOException
238 * in case of I/O error
240 private URLConnection
openConnectionWithCookies(URL url
,
241 URL currentReferer
, Map
<String
, String
> cookiesValues
)
243 URLConnection conn
= url
.openConnection();
245 conn
.setRequestProperty("User-Agent", UA
);
246 conn
.setRequestProperty("Cookie", generateCookies(cookiesValues
));
247 conn
.setRequestProperty("Accept-Encoding", "gzip");
248 if (currentReferer
!= null) {
249 conn
.setRequestProperty("Referer", currentReferer
.toString());
250 conn
.setRequestProperty("Host", currentReferer
.getHost());
257 * Generate the cookie {@link String} from the local {@link CookieStore} so
258 * it is ready to be passed.
262 private String
generateCookies(Map
<String
, String
> cookiesValues
) {
263 StringBuilder builder
= new StringBuilder();
264 for (HttpCookie cookie
: cookies
.getCookieStore().getCookies()) {
265 if (builder
.length() > 0) {
269 // TODO: check if format is ok
270 builder
.append(cookie
.toString());
273 if (cookiesValues
!= null) {
274 for (Map
.Entry
<String
, String
> set
: cookiesValues
.entrySet()) {
275 if (builder
.length() > 0) {
278 builder
.append(set
.getKey());
280 builder
.append(set
.getValue());
284 return builder
.toString();