Version 3.1.6: fix Bridge, Serialiser, Progress:
[fanfix.git] / src / be / nikiroo / utils / Downloader.java
CommitLineData
8816d2f7
NR
1package be.nikiroo.utils;
2
3import java.io.IOException;
4import java.io.InputStream;
5import java.io.OutputStreamWriter;
6import java.net.CookieHandler;
7import java.net.CookieManager;
8import java.net.CookiePolicy;
9import java.net.CookieStore;
10import java.net.HttpCookie;
11import java.net.HttpURLConnection;
12import java.net.URL;
13import java.net.URLConnection;
14import java.net.URLEncoder;
15import java.util.Map;
16import java.util.zip.GZIPInputStream;
17
18/**
19 * This class will help you download content from Internet Sites ({@link URL}
20 * based).
21 * <p>
22 * It allows you to control some options often required on web sites that do not
23 * want to simply serve HTML, but actively makes your life difficult with stupid
24 * checks.
25 *
26 * @author niki
27 */
28public class Downloader {
29 private String UA;
30 private CookieManager cookies;
530d4062 31 private TraceHandler tracer = new TraceHandler();
8816d2f7
NR
32
33 /**
34 * Create a new {@link Downloader}.
35 *
36 * @param UA
37 * the User-Agent to use to download the resources -- note that
38 * some websites require one, some actively blacklist real UAs
39 * like the one from wget, some whitelist a couple of browsers
40 * only (!)
41 */
42 public Downloader(String UA) {
43 this.UA = UA;
44
45 cookies = new CookieManager();
46 cookies.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
47 CookieHandler.setDefault(cookies);
48 }
49
530d4062
NR
50 /**
51 * The traces handler for this {@link Cache}.
52 *
53 * @return the traces handler
54 */
55 public TraceHandler getTraceHandler() {
56 return tracer;
57 }
58
59 /**
60 * The traces handler for this {@link Cache}.
61 *
62 * @param tracer
63 * the new traces handler
64 */
65 public void setTraceHandler(TraceHandler tracer) {
66 this.tracer = tracer;
67 }
68
8816d2f7
NR
69 /**
70 * Clear all the cookies currently in the jar.
71 * <p>
72 * As long as you don't, the cookies are kept.
73 */
74 public void clearCookies() {
75 cookies.getCookieStore().removeAll();
76 }
77
78 /**
79 * Open the given {@link URL} and update the cookies.
80 *
81 * @param url
82 * the {@link URL} to open
83 * @return the {@link InputStream} of the opened page
84 *
85 * @throws IOException
86 * in case of I/O error
87 **/
88 public InputStream open(URL url) throws IOException {
89 return open(url, url, url, null, null, null, null);
90 }
91
92 /**
93 * Open the given {@link URL} and update the cookies.
94 *
95 * @param url
96 * the {@link URL} to open
530d4062
NR
97 * @param currentReferer
98 * the current referer, for websites that needs this info
99 * @param cookiesValues
100 * the cookies
8816d2f7
NR
101 * @param postParams
102 * the POST parameters
103 * @param getParams
104 * the GET parameters (priority over POST)
105 * @param oauth
106 * OAuth authorization (aka, "bearer XXXXXXX")
107 *
108 * @return the {@link InputStream} of the opened page
109 *
110 * @throws IOException
111 * in case of I/O error
112 */
113 public InputStream open(URL url, URL currentReferer,
114 Map<String, String> cookiesValues, Map<String, String> postParams,
115 Map<String, String> getParams, String oauth) throws IOException {
116 return open(url, url, currentReferer, cookiesValues, postParams,
117 getParams, oauth);
118 }
119
8816d2f7
NR
120 /**
121 * Open the given {@link URL} and update the cookies.
122 *
123 * @param url
124 * the {@link URL} to open
125 * @param originalUrl
126 * the original {@link URL} before any redirection occurs
127 * @param postParams
128 * the POST parameters
129 * @param getParams
130 * the GET parameters (priority over POST)
131 * @param oauth
132 * OAuth authorisation (aka, "bearer XXXXXXX")
133 * @return the {@link InputStream} of the opened page
134 *
135 * @throws IOException
136 * in case of I/O error
137 */
138 private InputStream open(URL url, final URL originalUrl,
139 URL currentReferer, Map<String, String> cookiesValues,
140 Map<String, String> postParams, Map<String, String> getParams,
141 String oauth) throws IOException {
142
530d4062 143 tracer.trace("Download: " + url);
8816d2f7
NR
144
145 URLConnection conn = openConnectionWithCookies(url, currentReferer,
146 cookiesValues);
147
148 // Priority: GET over POST
149 Map<String, String> params = getParams;
150 if (getParams == null) {
151 params = postParams;
152 }
153
154 if ((params != null || oauth != null)
155 && conn instanceof HttpURLConnection) {
156 StringBuilder requestData = null;
157 if (params != null) {
158 requestData = new StringBuilder();
159 for (Map.Entry<String, String> param : params.entrySet()) {
160 if (requestData.length() != 0)
161 requestData.append('&');
162 requestData.append(URLEncoder.encode(param.getKey(),
163 "UTF-8"));
164 requestData.append('=');
165 requestData.append(URLEncoder.encode(
166 String.valueOf(param.getValue()), "UTF-8"));
167 }
168
169 conn.setDoOutput(true);
170
171 if (getParams == null && postParams != null) {
172 ((HttpURLConnection) conn).setRequestMethod("POST");
173 }
174
175 conn.setRequestProperty("Content-Type",
176 "application/x-www-form-urlencoded");
177 conn.setRequestProperty("charset", "utf-8");
178 }
179
180 if (oauth != null) {
181 conn.setRequestProperty("Authorization", oauth);
182 }
183
184 if (requestData != null) {
185 OutputStreamWriter writer = new OutputStreamWriter(
186 conn.getOutputStream());
187
188 writer.write(requestData.toString());
189 writer.flush();
190 writer.close();
191 }
192 }
193
194 conn.connect();
195
196 // Check if redirect
197 if (conn instanceof HttpURLConnection
198 && ((HttpURLConnection) conn).getResponseCode() / 100 == 3) {
199 String newUrl = conn.getHeaderField("Location");
200 return open(new URL(newUrl), originalUrl, currentReferer,
201 cookiesValues, postParams, getParams, oauth);
202 }
203
204 InputStream in = conn.getInputStream();
205 if ("gzip".equals(conn.getContentEncoding())) {
206 in = new GZIPInputStream(in);
207 }
208
209 return in;
210 }
211
212 /**
213 * Open a connection on the given {@link URL}, and manage the cookies that
214 * come with it.
215 *
216 * @param url
217 * the {@link URL} to open
218 *
219 * @return the connection
220 *
221 * @throws IOException
222 * in case of I/O error
223 */
224 private URLConnection openConnectionWithCookies(URL url,
225 URL currentReferer, Map<String, String> cookiesValues)
226 throws IOException {
227 URLConnection conn = url.openConnection();
228
229 conn.setRequestProperty("User-Agent", UA);
230 conn.setRequestProperty("Cookie", generateCookies(cookiesValues));
231 conn.setRequestProperty("Accept-Encoding", "gzip");
232 if (currentReferer != null) {
233 conn.setRequestProperty("Referer", currentReferer.toString());
234 conn.setRequestProperty("Host", currentReferer.getHost());
235 }
236
237 return conn;
238 }
239
240 /**
241 * Generate the cookie {@link String} from the local {@link CookieStore} so
242 * it is ready to be passed.
243 *
244 * @return the cookie
245 */
246 private String generateCookies(Map<String, String> cookiesValues) {
247 StringBuilder builder = new StringBuilder();
248 for (HttpCookie cookie : cookies.getCookieStore().getCookies()) {
249 if (builder.length() > 0) {
250 builder.append(';');
251 }
252
253 // TODO: check if format is ok
254 builder.append(cookie.toString());
255 }
256
257 if (cookiesValues != null) {
258 for (Map.Entry<String, String> set : cookiesValues.entrySet()) {
259 if (builder.length() > 0) {
260 builder.append(';');
261 }
262 builder.append(set.getKey());
263 builder.append('=');
264 builder.append(set.getValue());
265 }
266 }
267
268 return builder.toString();
269 }
270}