Fix SerialTest :
[fanfix.git] / src / be / nikiroo / utils / Downloader.java
CommitLineData
8816d2f7
NR
1package be.nikiroo.utils;
2
3import java.io.IOException;
4import java.io.InputStream;
5import java.io.OutputStreamWriter;
6import java.net.CookieHandler;
7import java.net.CookieManager;
8import java.net.CookiePolicy;
9import java.net.CookieStore;
10import java.net.HttpCookie;
11import java.net.HttpURLConnection;
12import java.net.URL;
13import java.net.URLConnection;
14import java.net.URLEncoder;
15import java.util.Map;
16import java.util.zip.GZIPInputStream;
17
18/**
19 * This class will help you download content from Internet Sites ({@link URL}
20 * based).
21 * <p>
22 * It allows you to control some options often required on web sites that do not
23 * want to simply serve HTML, but actively makes your life difficult with stupid
24 * checks.
25 *
26 * @author niki
27 */
28public class Downloader {
29 private String UA;
30 private CookieManager cookies;
530d4062 31 private TraceHandler tracer = new TraceHandler();
8816d2f7
NR
32
33 /**
34 * Create a new {@link Downloader}.
35 *
36 * @param UA
37 * the User-Agent to use to download the resources -- note that
38 * some websites require one, some actively blacklist real UAs
39 * like the one from wget, some whitelist a couple of browsers
40 * only (!)
41 */
42 public Downloader(String UA) {
43 this.UA = UA;
44
45 cookies = new CookieManager();
46 cookies.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
47 CookieHandler.setDefault(cookies);
48 }
49
530d4062
NR
50 /**
51 * The traces handler for this {@link Cache}.
52 *
53 * @return the traces handler
54 */
55 public TraceHandler getTraceHandler() {
56 return tracer;
57 }
58
59 /**
60 * The traces handler for this {@link Cache}.
61 *
62 * @param tracer
63 * the new traces handler
64 */
65 public void setTraceHandler(TraceHandler tracer) {
80500544
NR
66 if (tracer == null) {
67 tracer = new TraceHandler(false, false, false);
68 }
69
530d4062
NR
70 this.tracer = tracer;
71 }
72
8816d2f7
NR
73 /**
74 * Clear all the cookies currently in the jar.
75 * <p>
76 * As long as you don't, the cookies are kept.
77 */
78 public void clearCookies() {
79 cookies.getCookieStore().removeAll();
80 }
81
82 /**
83 * Open the given {@link URL} and update the cookies.
84 *
85 * @param url
86 * the {@link URL} to open
87 * @return the {@link InputStream} of the opened page
88 *
89 * @throws IOException
90 * in case of I/O error
91 **/
92 public InputStream open(URL url) throws IOException {
93 return open(url, url, url, null, null, null, null);
94 }
95
96 /**
97 * Open the given {@link URL} and update the cookies.
98 *
99 * @param url
100 * the {@link URL} to open
530d4062
NR
101 * @param currentReferer
102 * the current referer, for websites that needs this info
103 * @param cookiesValues
104 * the cookies
8816d2f7
NR
105 * @param postParams
106 * the POST parameters
107 * @param getParams
108 * the GET parameters (priority over POST)
109 * @param oauth
110 * OAuth authorization (aka, "bearer XXXXXXX")
111 *
112 * @return the {@link InputStream} of the opened page
113 *
114 * @throws IOException
115 * in case of I/O error
116 */
117 public InputStream open(URL url, URL currentReferer,
118 Map<String, String> cookiesValues, Map<String, String> postParams,
119 Map<String, String> getParams, String oauth) throws IOException {
120 return open(url, url, currentReferer, cookiesValues, postParams,
121 getParams, oauth);
122 }
123
8816d2f7
NR
124 /**
125 * Open the given {@link URL} and update the cookies.
126 *
127 * @param url
128 * the {@link URL} to open
129 * @param originalUrl
130 * the original {@link URL} before any redirection occurs
131 * @param postParams
132 * the POST parameters
133 * @param getParams
134 * the GET parameters (priority over POST)
135 * @param oauth
136 * OAuth authorisation (aka, "bearer XXXXXXX")
137 * @return the {@link InputStream} of the opened page
138 *
139 * @throws IOException
140 * in case of I/O error
141 */
142 private InputStream open(URL url, final URL originalUrl,
143 URL currentReferer, Map<String, String> cookiesValues,
144 Map<String, String> postParams, Map<String, String> getParams,
145 String oauth) throws IOException {
146
530d4062 147 tracer.trace("Download: " + url);
8816d2f7
NR
148
149 URLConnection conn = openConnectionWithCookies(url, currentReferer,
150 cookiesValues);
151
152 // Priority: GET over POST
153 Map<String, String> params = getParams;
154 if (getParams == null) {
155 params = postParams;
156 }
157
158 if ((params != null || oauth != null)
159 && conn instanceof HttpURLConnection) {
160 StringBuilder requestData = null;
161 if (params != null) {
162 requestData = new StringBuilder();
163 for (Map.Entry<String, String> param : params.entrySet()) {
164 if (requestData.length() != 0)
165 requestData.append('&');
166 requestData.append(URLEncoder.encode(param.getKey(),
167 "UTF-8"));
168 requestData.append('=');
169 requestData.append(URLEncoder.encode(
170 String.valueOf(param.getValue()), "UTF-8"));
171 }
172
173 conn.setDoOutput(true);
174
175 if (getParams == null && postParams != null) {
176 ((HttpURLConnection) conn).setRequestMethod("POST");
177 }
178
179 conn.setRequestProperty("Content-Type",
180 "application/x-www-form-urlencoded");
181 conn.setRequestProperty("charset", "utf-8");
182 }
183
184 if (oauth != null) {
185 conn.setRequestProperty("Authorization", oauth);
186 }
187
188 if (requestData != null) {
0988831f
NR
189 OutputStreamWriter writer = null;
190 try {
191 writer = new OutputStreamWriter(conn.getOutputStream());
192 writer.write(requestData.toString());
193 writer.flush();
194 } finally {
195 if (writer != null) {
196 writer.close();
197 }
198 }
8816d2f7
NR
199 }
200 }
201
202 conn.connect();
203
204 // Check if redirect
6149689f
NR
205 if (conn instanceof HttpURLConnection) {
206 int repCode = 0;
207 try {
208 // Can fail in some circumstances
209 repCode = ((HttpURLConnection) conn).getResponseCode();
210 } catch (IOException e) {
211 }
212
213 if (repCode / 100 == 3) {
214 String newUrl = conn.getHeaderField("Location");
215 return open(new URL(newUrl), originalUrl, currentReferer,
216 cookiesValues, postParams, getParams, oauth);
217 }
8816d2f7
NR
218 }
219
220 InputStream in = conn.getInputStream();
221 if ("gzip".equals(conn.getContentEncoding())) {
222 in = new GZIPInputStream(in);
223 }
224
225 return in;
226 }
227
228 /**
229 * Open a connection on the given {@link URL}, and manage the cookies that
230 * come with it.
231 *
232 * @param url
233 * the {@link URL} to open
234 *
235 * @return the connection
236 *
237 * @throws IOException
238 * in case of I/O error
239 */
240 private URLConnection openConnectionWithCookies(URL url,
241 URL currentReferer, Map<String, String> cookiesValues)
242 throws IOException {
243 URLConnection conn = url.openConnection();
244
245 conn.setRequestProperty("User-Agent", UA);
246 conn.setRequestProperty("Cookie", generateCookies(cookiesValues));
247 conn.setRequestProperty("Accept-Encoding", "gzip");
248 if (currentReferer != null) {
249 conn.setRequestProperty("Referer", currentReferer.toString());
250 conn.setRequestProperty("Host", currentReferer.getHost());
251 }
252
253 return conn;
254 }
255
256 /**
257 * Generate the cookie {@link String} from the local {@link CookieStore} so
258 * it is ready to be passed.
259 *
260 * @return the cookie
261 */
262 private String generateCookies(Map<String, String> cookiesValues) {
263 StringBuilder builder = new StringBuilder();
264 for (HttpCookie cookie : cookies.getCookieStore().getCookies()) {
265 if (builder.length() > 0) {
266 builder.append(';');
267 }
268
269 // TODO: check if format is ok
270 builder.append(cookie.toString());
271 }
272
273 if (cookiesValues != null) {
274 for (Map.Entry<String, String> set : cookiesValues.entrySet()) {
275 if (builder.length() > 0) {
276 builder.append(';');
277 }
278 builder.append(set.getKey());
279 builder.append('=');
280 builder.append(set.getValue());
281 }
282 }
283
284 return builder.toString();
285 }
286}