1 package be
.nikiroo
.utils
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
5 import java
.io
.OutputStreamWriter
;
6 import java
.net
.CookieHandler
;
7 import java
.net
.CookieManager
;
8 import java
.net
.CookiePolicy
;
9 import java
.net
.CookieStore
;
10 import java
.net
.HttpCookie
;
11 import java
.net
.HttpURLConnection
;
13 import java
.net
.URLConnection
;
14 import java
.net
.URLEncoder
;
16 import java
.util
.zip
.GZIPInputStream
;
19 * This class will help you download content from Internet Sites ({@link URL}
22 * It allows you to control some options often required on web sites that do not
23 * want to simply serve HTML, but actively makes your life difficult with stupid
28 public class Downloader
{
30 private CookieManager cookies
;
33 * Create a new {@link Downloader}.
36 * the User-Agent to use to download the resources -- note that
37 * some websites require one, some actively blacklist real UAs
38 * like the one from wget, some whitelist a couple of browsers
41 public Downloader(String UA
) {
44 cookies
= new CookieManager();
45 cookies
.setCookiePolicy(CookiePolicy
.ACCEPT_ALL
);
46 CookieHandler
.setDefault(cookies
);
50 * Clear all the cookies currently in the jar.
52 * As long as you don't, the cookies are kept.
54 public void clearCookies() {
55 cookies
.getCookieStore().removeAll();
59 * Open the given {@link URL} and update the cookies.
62 * the {@link URL} to open
63 * @return the {@link InputStream} of the opened page
66 * in case of I/O error
68 public InputStream
open(URL url
) throws IOException
{
69 return open(url
, url
, url
, null, null, null, null);
73 * Open the given {@link URL} and update the cookies.
76 * the {@link URL} to open
80 * the GET parameters (priority over POST)
82 * OAuth authorization (aka, "bearer XXXXXXX")
84 * @return the {@link InputStream} of the opened page
87 * in case of I/O error
89 public InputStream
open(URL url
, URL currentReferer
,
90 Map
<String
, String
> cookiesValues
, Map
<String
, String
> postParams
,
91 Map
<String
, String
> getParams
, String oauth
) throws IOException
{
92 return open(url
, url
, currentReferer
, cookiesValues
, postParams
,
97 * Trace information (info/error) generated by this class.
99 * You can override it if you don't want the default sysout/syserr.
104 * TRUE for error messages, FALSE for information messages
106 protected void trace(String message
, boolean error
) {
108 System
.err
.println(message
);
110 System
.out
.println(message
);
115 * Open the given {@link URL} and update the cookies.
118 * the {@link URL} to open
120 * the original {@link URL} before any redirection occurs
122 * the POST parameters
124 * the GET parameters (priority over POST)
126 * OAuth authorisation (aka, "bearer XXXXXXX")
127 * @return the {@link InputStream} of the opened page
129 * @throws IOException
130 * in case of I/O error
132 private InputStream
open(URL url
, final URL originalUrl
,
133 URL currentReferer
, Map
<String
, String
> cookiesValues
,
134 Map
<String
, String
> postParams
, Map
<String
, String
> getParams
,
135 String oauth
) throws IOException
{
137 trace("Download: " + url
, false);
139 URLConnection conn
= openConnectionWithCookies(url
, currentReferer
,
142 // Priority: GET over POST
143 Map
<String
, String
> params
= getParams
;
144 if (getParams
== null) {
148 if ((params
!= null || oauth
!= null)
149 && conn
instanceof HttpURLConnection
) {
150 StringBuilder requestData
= null;
151 if (params
!= null) {
152 requestData
= new StringBuilder();
153 for (Map
.Entry
<String
, String
> param
: params
.entrySet()) {
154 if (requestData
.length() != 0)
155 requestData
.append('&');
156 requestData
.append(URLEncoder
.encode(param
.getKey(),
158 requestData
.append('=');
159 requestData
.append(URLEncoder
.encode(
160 String
.valueOf(param
.getValue()), "UTF-8"));
163 conn
.setDoOutput(true);
165 if (getParams
== null && postParams
!= null) {
166 ((HttpURLConnection
) conn
).setRequestMethod("POST");
169 conn
.setRequestProperty("Content-Type",
170 "application/x-www-form-urlencoded");
171 conn
.setRequestProperty("charset", "utf-8");
175 conn
.setRequestProperty("Authorization", oauth
);
178 if (requestData
!= null) {
179 OutputStreamWriter writer
= new OutputStreamWriter(
180 conn
.getOutputStream());
182 writer
.write(requestData
.toString());
191 if (conn
instanceof HttpURLConnection
192 && ((HttpURLConnection
) conn
).getResponseCode() / 100 == 3) {
193 String newUrl
= conn
.getHeaderField("Location");
194 return open(new URL(newUrl
), originalUrl
, currentReferer
,
195 cookiesValues
, postParams
, getParams
, oauth
);
198 InputStream in
= conn
.getInputStream();
199 if ("gzip".equals(conn
.getContentEncoding())) {
200 in
= new GZIPInputStream(in
);
207 * Open a connection on the given {@link URL}, and manage the cookies that
211 * the {@link URL} to open
213 * @return the connection
215 * @throws IOException
216 * in case of I/O error
218 private URLConnection
openConnectionWithCookies(URL url
,
219 URL currentReferer
, Map
<String
, String
> cookiesValues
)
221 URLConnection conn
= url
.openConnection();
223 conn
.setRequestProperty("User-Agent", UA
);
224 conn
.setRequestProperty("Cookie", generateCookies(cookiesValues
));
225 conn
.setRequestProperty("Accept-Encoding", "gzip");
226 if (currentReferer
!= null) {
227 conn
.setRequestProperty("Referer", currentReferer
.toString());
228 conn
.setRequestProperty("Host", currentReferer
.getHost());
235 * Generate the cookie {@link String} from the local {@link CookieStore} so
236 * it is ready to be passed.
240 private String
generateCookies(Map
<String
, String
> cookiesValues
) {
241 StringBuilder builder
= new StringBuilder();
242 for (HttpCookie cookie
: cookies
.getCookieStore().getCookies()) {
243 if (builder
.length() > 0) {
247 // TODO: check if format is ok
248 builder
.append(cookie
.toString());
251 if (cookiesValues
!= null) {
252 for (Map
.Entry
<String
, String
> set
: cookiesValues
.entrySet()) {
253 if (builder
.length() > 0) {
256 builder
.append(set
.getKey());
258 builder
.append(set
.getValue());
262 return builder
.toString();