New: Downloader, Cache
[nikiroo-utils.git] / src / be / nikiroo / utils / Downloader.java
1 package be.nikiroo.utils;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.io.OutputStreamWriter;
6 import java.net.CookieHandler;
7 import java.net.CookieManager;
8 import java.net.CookiePolicy;
9 import java.net.CookieStore;
10 import java.net.HttpCookie;
11 import java.net.HttpURLConnection;
12 import java.net.URL;
13 import java.net.URLConnection;
14 import java.net.URLEncoder;
15 import java.util.Map;
16 import java.util.zip.GZIPInputStream;
17
18 /**
19 * This class will help you download content from Internet Sites ({@link URL}
20 * based).
21 * <p>
22 * It allows you to control some options often required on web sites that do not
23 * want to simply serve HTML, but actively makes your life difficult with stupid
24 * checks.
25 *
26 * @author niki
27 */
28 public class Downloader {
29 private String UA;
30 private CookieManager cookies;
31
32 /**
33 * Create a new {@link Downloader}.
34 *
35 * @param UA
36 * the User-Agent to use to download the resources -- note that
37 * some websites require one, some actively blacklist real UAs
38 * like the one from wget, some whitelist a couple of browsers
39 * only (!)
40 */
41 public Downloader(String UA) {
42 this.UA = UA;
43
44 cookies = new CookieManager();
45 cookies.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
46 CookieHandler.setDefault(cookies);
47 }
48
49 /**
50 * Clear all the cookies currently in the jar.
51 * <p>
52 * As long as you don't, the cookies are kept.
53 */
54 public void clearCookies() {
55 cookies.getCookieStore().removeAll();
56 }
57
58 /**
59 * Open the given {@link URL} and update the cookies.
60 *
61 * @param url
62 * the {@link URL} to open
63 * @return the {@link InputStream} of the opened page
64 *
65 * @throws IOException
66 * in case of I/O error
67 **/
68 public InputStream open(URL url) throws IOException {
69 return open(url, url, url, null, null, null, null);
70 }
71
72 /**
73 * Open the given {@link URL} and update the cookies.
74 *
75 * @param url
76 * the {@link URL} to open
77 * @param postParams
78 * the POST parameters
79 * @param getParams
80 * the GET parameters (priority over POST)
81 * @param oauth
82 * OAuth authorization (aka, "bearer XXXXXXX")
83 *
84 * @return the {@link InputStream} of the opened page
85 *
86 * @throws IOException
87 * in case of I/O error
88 */
89 public InputStream open(URL url, URL currentReferer,
90 Map<String, String> cookiesValues, Map<String, String> postParams,
91 Map<String, String> getParams, String oauth) throws IOException {
92 return open(url, url, currentReferer, cookiesValues, postParams,
93 getParams, oauth);
94 }
95
96 /**
97 * Trace information (info/error) generated by this class.
98 * <p>
99 * You can override it if you don't want the default sysout/syserr.
100 *
101 * @param message
102 * the message
103 * @param error
104 * TRUE for error messages, FALSE for information messages
105 */
106 protected void trace(String message, boolean error) {
107 if (error) {
108 System.err.println(message);
109 } else {
110 System.out.println(message);
111 }
112 }
113
114 /**
115 * Open the given {@link URL} and update the cookies.
116 *
117 * @param url
118 * the {@link URL} to open
119 * @param originalUrl
120 * the original {@link URL} before any redirection occurs
121 * @param postParams
122 * the POST parameters
123 * @param getParams
124 * the GET parameters (priority over POST)
125 * @param oauth
126 * OAuth authorisation (aka, "bearer XXXXXXX")
127 * @return the {@link InputStream} of the opened page
128 *
129 * @throws IOException
130 * in case of I/O error
131 */
132 private InputStream open(URL url, final URL originalUrl,
133 URL currentReferer, Map<String, String> cookiesValues,
134 Map<String, String> postParams, Map<String, String> getParams,
135 String oauth) throws IOException {
136
137 trace("Download: " + url, false);
138
139 URLConnection conn = openConnectionWithCookies(url, currentReferer,
140 cookiesValues);
141
142 // Priority: GET over POST
143 Map<String, String> params = getParams;
144 if (getParams == null) {
145 params = postParams;
146 }
147
148 if ((params != null || oauth != null)
149 && conn instanceof HttpURLConnection) {
150 StringBuilder requestData = null;
151 if (params != null) {
152 requestData = new StringBuilder();
153 for (Map.Entry<String, String> param : params.entrySet()) {
154 if (requestData.length() != 0)
155 requestData.append('&');
156 requestData.append(URLEncoder.encode(param.getKey(),
157 "UTF-8"));
158 requestData.append('=');
159 requestData.append(URLEncoder.encode(
160 String.valueOf(param.getValue()), "UTF-8"));
161 }
162
163 conn.setDoOutput(true);
164
165 if (getParams == null && postParams != null) {
166 ((HttpURLConnection) conn).setRequestMethod("POST");
167 }
168
169 conn.setRequestProperty("Content-Type",
170 "application/x-www-form-urlencoded");
171 conn.setRequestProperty("charset", "utf-8");
172 }
173
174 if (oauth != null) {
175 conn.setRequestProperty("Authorization", oauth);
176 }
177
178 if (requestData != null) {
179 OutputStreamWriter writer = new OutputStreamWriter(
180 conn.getOutputStream());
181
182 writer.write(requestData.toString());
183 writer.flush();
184 writer.close();
185 }
186 }
187
188 conn.connect();
189
190 // Check if redirect
191 if (conn instanceof HttpURLConnection
192 && ((HttpURLConnection) conn).getResponseCode() / 100 == 3) {
193 String newUrl = conn.getHeaderField("Location");
194 return open(new URL(newUrl), originalUrl, currentReferer,
195 cookiesValues, postParams, getParams, oauth);
196 }
197
198 InputStream in = conn.getInputStream();
199 if ("gzip".equals(conn.getContentEncoding())) {
200 in = new GZIPInputStream(in);
201 }
202
203 return in;
204 }
205
206 /**
207 * Open a connection on the given {@link URL}, and manage the cookies that
208 * come with it.
209 *
210 * @param url
211 * the {@link URL} to open
212 *
213 * @return the connection
214 *
215 * @throws IOException
216 * in case of I/O error
217 */
218 private URLConnection openConnectionWithCookies(URL url,
219 URL currentReferer, Map<String, String> cookiesValues)
220 throws IOException {
221 URLConnection conn = url.openConnection();
222
223 conn.setRequestProperty("User-Agent", UA);
224 conn.setRequestProperty("Cookie", generateCookies(cookiesValues));
225 conn.setRequestProperty("Accept-Encoding", "gzip");
226 if (currentReferer != null) {
227 conn.setRequestProperty("Referer", currentReferer.toString());
228 conn.setRequestProperty("Host", currentReferer.getHost());
229 }
230
231 return conn;
232 }
233
234 /**
235 * Generate the cookie {@link String} from the local {@link CookieStore} so
236 * it is ready to be passed.
237 *
238 * @return the cookie
239 */
240 private String generateCookies(Map<String, String> cookiesValues) {
241 StringBuilder builder = new StringBuilder();
242 for (HttpCookie cookie : cookies.getCookieStore().getCookies()) {
243 if (builder.length() > 0) {
244 builder.append(';');
245 }
246
247 // TODO: check if format is ok
248 builder.append(cookie.toString());
249 }
250
251 if (cookiesValues != null) {
252 for (Map.Entry<String, String> set : cookiesValues.entrySet()) {
253 if (builder.length() > 0) {
254 builder.append(';');
255 }
256 builder.append(set.getKey());
257 builder.append('=');
258 builder.append(set.getValue());
259 }
260 }
261
262 return builder.toString();
263 }
264 }