Version 4.0.0: java.awt dependencies move
[nikiroo-utils.git] / src / be / nikiroo / utils / Downloader.java
CommitLineData
8816d2f7
NR
1package be.nikiroo.utils;
2
3import java.io.IOException;
4import java.io.InputStream;
5import java.io.OutputStreamWriter;
6import java.net.CookieHandler;
7import java.net.CookieManager;
8import java.net.CookiePolicy;
9import java.net.CookieStore;
10import java.net.HttpCookie;
11import java.net.HttpURLConnection;
12import java.net.URL;
13import java.net.URLConnection;
14import java.net.URLEncoder;
15import java.util.Map;
16import java.util.zip.GZIPInputStream;
17
18/**
19 * This class will help you download content from Internet Sites ({@link URL}
20 * based).
21 * <p>
22 * It allows you to control some options often required on web sites that do not
23 * want to simply serve HTML, but actively makes your life difficult with stupid
24 * checks.
25 *
26 * @author niki
27 */
28public class Downloader {
29 private String UA;
30 private CookieManager cookies;
530d4062 31 private TraceHandler tracer = new TraceHandler();
8816d2f7
NR
32
33 /**
34 * Create a new {@link Downloader}.
35 *
36 * @param UA
37 * the User-Agent to use to download the resources -- note that
38 * some websites require one, some actively blacklist real UAs
39 * like the one from wget, some whitelist a couple of browsers
40 * only (!)
41 */
42 public Downloader(String UA) {
43 this.UA = UA;
44
45 cookies = new CookieManager();
46 cookies.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
47 CookieHandler.setDefault(cookies);
48 }
49
530d4062
NR
50 /**
51 * The traces handler for this {@link Cache}.
52 *
53 * @return the traces handler
54 */
55 public TraceHandler getTraceHandler() {
56 return tracer;
57 }
58
59 /**
60 * The traces handler for this {@link Cache}.
61 *
62 * @param tracer
63 * the new traces handler
64 */
65 public void setTraceHandler(TraceHandler tracer) {
80500544
NR
66 if (tracer == null) {
67 tracer = new TraceHandler(false, false, false);
68 }
69
530d4062
NR
70 this.tracer = tracer;
71 }
72
8816d2f7
NR
73 /**
74 * Clear all the cookies currently in the jar.
75 * <p>
76 * As long as you don't, the cookies are kept.
77 */
78 public void clearCookies() {
79 cookies.getCookieStore().removeAll();
80 }
81
82 /**
83 * Open the given {@link URL} and update the cookies.
84 *
85 * @param url
86 * the {@link URL} to open
87 * @return the {@link InputStream} of the opened page
88 *
89 * @throws IOException
90 * in case of I/O error
91 **/
92 public InputStream open(URL url) throws IOException {
93 return open(url, url, url, null, null, null, null);
94 }
95
96 /**
97 * Open the given {@link URL} and update the cookies.
98 *
99 * @param url
100 * the {@link URL} to open
530d4062
NR
101 * @param currentReferer
102 * the current referer, for websites that needs this info
103 * @param cookiesValues
104 * the cookies
8816d2f7
NR
105 * @param postParams
106 * the POST parameters
107 * @param getParams
108 * the GET parameters (priority over POST)
109 * @param oauth
110 * OAuth authorization (aka, "bearer XXXXXXX")
111 *
112 * @return the {@link InputStream} of the opened page
113 *
114 * @throws IOException
115 * in case of I/O error
116 */
117 public InputStream open(URL url, URL currentReferer,
118 Map<String, String> cookiesValues, Map<String, String> postParams,
119 Map<String, String> getParams, String oauth) throws IOException {
120 return open(url, url, currentReferer, cookiesValues, postParams,
121 getParams, oauth);
122 }
123
8816d2f7
NR
124 /**
125 * Open the given {@link URL} and update the cookies.
126 *
127 * @param url
128 * the {@link URL} to open
129 * @param originalUrl
130 * the original {@link URL} before any redirection occurs
131 * @param postParams
132 * the POST parameters
133 * @param getParams
134 * the GET parameters (priority over POST)
135 * @param oauth
136 * OAuth authorisation (aka, "bearer XXXXXXX")
137 * @return the {@link InputStream} of the opened page
138 *
139 * @throws IOException
140 * in case of I/O error
141 */
142 private InputStream open(URL url, final URL originalUrl,
143 URL currentReferer, Map<String, String> cookiesValues,
144 Map<String, String> postParams, Map<String, String> getParams,
145 String oauth) throws IOException {
146
530d4062 147 tracer.trace("Download: " + url);
8816d2f7
NR
148
149 URLConnection conn = openConnectionWithCookies(url, currentReferer,
150 cookiesValues);
151
152 // Priority: GET over POST
153 Map<String, String> params = getParams;
154 if (getParams == null) {
155 params = postParams;
156 }
157
158 if ((params != null || oauth != null)
159 && conn instanceof HttpURLConnection) {
160 StringBuilder requestData = null;
161 if (params != null) {
162 requestData = new StringBuilder();
163 for (Map.Entry<String, String> param : params.entrySet()) {
164 if (requestData.length() != 0)
165 requestData.append('&');
166 requestData.append(URLEncoder.encode(param.getKey(),
167 "UTF-8"));
168 requestData.append('=');
169 requestData.append(URLEncoder.encode(
170 String.valueOf(param.getValue()), "UTF-8"));
171 }
172
173 conn.setDoOutput(true);
174
175 if (getParams == null && postParams != null) {
176 ((HttpURLConnection) conn).setRequestMethod("POST");
177 }
178
179 conn.setRequestProperty("Content-Type",
180 "application/x-www-form-urlencoded");
181 conn.setRequestProperty("charset", "utf-8");
182 }
183
184 if (oauth != null) {
185 conn.setRequestProperty("Authorization", oauth);
186 }
187
188 if (requestData != null) {
189 OutputStreamWriter writer = new OutputStreamWriter(
190 conn.getOutputStream());
191
192 writer.write(requestData.toString());
193 writer.flush();
194 writer.close();
195 }
196 }
197
198 conn.connect();
199
200 // Check if redirect
201 if (conn instanceof HttpURLConnection
202 && ((HttpURLConnection) conn).getResponseCode() / 100 == 3) {
203 String newUrl = conn.getHeaderField("Location");
204 return open(new URL(newUrl), originalUrl, currentReferer,
205 cookiesValues, postParams, getParams, oauth);
206 }
207
208 InputStream in = conn.getInputStream();
209 if ("gzip".equals(conn.getContentEncoding())) {
210 in = new GZIPInputStream(in);
211 }
212
213 return in;
214 }
215
216 /**
217 * Open a connection on the given {@link URL}, and manage the cookies that
218 * come with it.
219 *
220 * @param url
221 * the {@link URL} to open
222 *
223 * @return the connection
224 *
225 * @throws IOException
226 * in case of I/O error
227 */
228 private URLConnection openConnectionWithCookies(URL url,
229 URL currentReferer, Map<String, String> cookiesValues)
230 throws IOException {
231 URLConnection conn = url.openConnection();
232
233 conn.setRequestProperty("User-Agent", UA);
234 conn.setRequestProperty("Cookie", generateCookies(cookiesValues));
235 conn.setRequestProperty("Accept-Encoding", "gzip");
236 if (currentReferer != null) {
237 conn.setRequestProperty("Referer", currentReferer.toString());
238 conn.setRequestProperty("Host", currentReferer.getHost());
239 }
240
241 return conn;
242 }
243
244 /**
245 * Generate the cookie {@link String} from the local {@link CookieStore} so
246 * it is ready to be passed.
247 *
248 * @return the cookie
249 */
250 private String generateCookies(Map<String, String> cookiesValues) {
251 StringBuilder builder = new StringBuilder();
252 for (HttpCookie cookie : cookies.getCookieStore().getCookies()) {
253 if (builder.length() > 0) {
254 builder.append(';');
255 }
256
257 // TODO: check if format is ok
258 builder.append(cookie.toString());
259 }
260
261 if (cookiesValues != null) {
262 for (Map.Entry<String, String> set : cookiesValues.entrySet()) {
263 if (builder.length() > 0) {
264 builder.append(';');
265 }
266 builder.append(set.getKey());
267 builder.append('=');
268 builder.append(set.getValue());
269 }
270 }
271
272 return builder.toString();
273 }
274}