Commit | Line | Data |
---|---|---|
8816d2f7 NR |
1 | package be.nikiroo.utils; |
2 | ||
3 | import java.io.IOException; | |
4 | import java.io.InputStream; | |
5 | import java.io.OutputStreamWriter; | |
6 | import java.net.CookieHandler; | |
7 | import java.net.CookieManager; | |
8 | import java.net.CookiePolicy; | |
9 | import java.net.CookieStore; | |
10 | import java.net.HttpCookie; | |
11 | import java.net.HttpURLConnection; | |
12 | import java.net.URL; | |
13 | import java.net.URLConnection; | |
14 | import java.net.URLEncoder; | |
15 | import java.util.Map; | |
16 | import java.util.zip.GZIPInputStream; | |
17 | ||
18 | /** | |
19 | * This class will help you download content from Internet Sites ({@link URL} | |
20 | * based). | |
21 | * <p> | |
22 | * It allows you to control some options often required on web sites that do not | |
23 | * want to simply serve HTML, but actively makes your life difficult with stupid | |
24 | * checks. | |
25 | * | |
26 | * @author niki | |
27 | */ | |
28 | public class Downloader { | |
29 | private String UA; | |
30 | private CookieManager cookies; | |
31 | ||
32 | /** | |
33 | * Create a new {@link Downloader}. | |
34 | * | |
35 | * @param UA | |
36 | * the User-Agent to use to download the resources -- note that | |
37 | * some websites require one, some actively blacklist real UAs | |
38 | * like the one from wget, some whitelist a couple of browsers | |
39 | * only (!) | |
40 | */ | |
41 | public Downloader(String UA) { | |
42 | this.UA = UA; | |
43 | ||
44 | cookies = new CookieManager(); | |
45 | cookies.setCookiePolicy(CookiePolicy.ACCEPT_ALL); | |
46 | CookieHandler.setDefault(cookies); | |
47 | } | |
48 | ||
49 | /** | |
50 | * Clear all the cookies currently in the jar. | |
51 | * <p> | |
52 | * As long as you don't, the cookies are kept. | |
53 | */ | |
54 | public void clearCookies() { | |
55 | cookies.getCookieStore().removeAll(); | |
56 | } | |
57 | ||
58 | /** | |
59 | * Open the given {@link URL} and update the cookies. | |
60 | * | |
61 | * @param url | |
62 | * the {@link URL} to open | |
63 | * @return the {@link InputStream} of the opened page | |
64 | * | |
65 | * @throws IOException | |
66 | * in case of I/O error | |
67 | **/ | |
68 | public InputStream open(URL url) throws IOException { | |
69 | return open(url, url, url, null, null, null, null); | |
70 | } | |
71 | ||
72 | /** | |
73 | * Open the given {@link URL} and update the cookies. | |
74 | * | |
75 | * @param url | |
76 | * the {@link URL} to open | |
77 | * @param postParams | |
78 | * the POST parameters | |
79 | * @param getParams | |
80 | * the GET parameters (priority over POST) | |
81 | * @param oauth | |
82 | * OAuth authorization (aka, "bearer XXXXXXX") | |
83 | * | |
84 | * @return the {@link InputStream} of the opened page | |
85 | * | |
86 | * @throws IOException | |
87 | * in case of I/O error | |
88 | */ | |
89 | public InputStream open(URL url, URL currentReferer, | |
90 | Map<String, String> cookiesValues, Map<String, String> postParams, | |
91 | Map<String, String> getParams, String oauth) throws IOException { | |
92 | return open(url, url, currentReferer, cookiesValues, postParams, | |
93 | getParams, oauth); | |
94 | } | |
95 | ||
96 | /** | |
97 | * Trace information (info/error) generated by this class. | |
98 | * <p> | |
99 | * You can override it if you don't want the default sysout/syserr. | |
100 | * | |
101 | * @param message | |
102 | * the message | |
103 | * @param error | |
104 | * TRUE for error messages, FALSE for information messages | |
105 | */ | |
106 | protected void trace(String message, boolean error) { | |
107 | if (error) { | |
108 | System.err.println(message); | |
109 | } else { | |
110 | System.out.println(message); | |
111 | } | |
112 | } | |
113 | ||
114 | /** | |
115 | * Open the given {@link URL} and update the cookies. | |
116 | * | |
117 | * @param url | |
118 | * the {@link URL} to open | |
119 | * @param originalUrl | |
120 | * the original {@link URL} before any redirection occurs | |
121 | * @param postParams | |
122 | * the POST parameters | |
123 | * @param getParams | |
124 | * the GET parameters (priority over POST) | |
125 | * @param oauth | |
126 | * OAuth authorisation (aka, "bearer XXXXXXX") | |
127 | * @return the {@link InputStream} of the opened page | |
128 | * | |
129 | * @throws IOException | |
130 | * in case of I/O error | |
131 | */ | |
132 | private InputStream open(URL url, final URL originalUrl, | |
133 | URL currentReferer, Map<String, String> cookiesValues, | |
134 | Map<String, String> postParams, Map<String, String> getParams, | |
135 | String oauth) throws IOException { | |
136 | ||
137 | trace("Download: " + url, false); | |
138 | ||
139 | URLConnection conn = openConnectionWithCookies(url, currentReferer, | |
140 | cookiesValues); | |
141 | ||
142 | // Priority: GET over POST | |
143 | Map<String, String> params = getParams; | |
144 | if (getParams == null) { | |
145 | params = postParams; | |
146 | } | |
147 | ||
148 | if ((params != null || oauth != null) | |
149 | && conn instanceof HttpURLConnection) { | |
150 | StringBuilder requestData = null; | |
151 | if (params != null) { | |
152 | requestData = new StringBuilder(); | |
153 | for (Map.Entry<String, String> param : params.entrySet()) { | |
154 | if (requestData.length() != 0) | |
155 | requestData.append('&'); | |
156 | requestData.append(URLEncoder.encode(param.getKey(), | |
157 | "UTF-8")); | |
158 | requestData.append('='); | |
159 | requestData.append(URLEncoder.encode( | |
160 | String.valueOf(param.getValue()), "UTF-8")); | |
161 | } | |
162 | ||
163 | conn.setDoOutput(true); | |
164 | ||
165 | if (getParams == null && postParams != null) { | |
166 | ((HttpURLConnection) conn).setRequestMethod("POST"); | |
167 | } | |
168 | ||
169 | conn.setRequestProperty("Content-Type", | |
170 | "application/x-www-form-urlencoded"); | |
171 | conn.setRequestProperty("charset", "utf-8"); | |
172 | } | |
173 | ||
174 | if (oauth != null) { | |
175 | conn.setRequestProperty("Authorization", oauth); | |
176 | } | |
177 | ||
178 | if (requestData != null) { | |
179 | OutputStreamWriter writer = new OutputStreamWriter( | |
180 | conn.getOutputStream()); | |
181 | ||
182 | writer.write(requestData.toString()); | |
183 | writer.flush(); | |
184 | writer.close(); | |
185 | } | |
186 | } | |
187 | ||
188 | conn.connect(); | |
189 | ||
190 | // Check if redirect | |
191 | if (conn instanceof HttpURLConnection | |
192 | && ((HttpURLConnection) conn).getResponseCode() / 100 == 3) { | |
193 | String newUrl = conn.getHeaderField("Location"); | |
194 | return open(new URL(newUrl), originalUrl, currentReferer, | |
195 | cookiesValues, postParams, getParams, oauth); | |
196 | } | |
197 | ||
198 | InputStream in = conn.getInputStream(); | |
199 | if ("gzip".equals(conn.getContentEncoding())) { | |
200 | in = new GZIPInputStream(in); | |
201 | } | |
202 | ||
203 | return in; | |
204 | } | |
205 | ||
206 | /** | |
207 | * Open a connection on the given {@link URL}, and manage the cookies that | |
208 | * come with it. | |
209 | * | |
210 | * @param url | |
211 | * the {@link URL} to open | |
212 | * | |
213 | * @return the connection | |
214 | * | |
215 | * @throws IOException | |
216 | * in case of I/O error | |
217 | */ | |
218 | private URLConnection openConnectionWithCookies(URL url, | |
219 | URL currentReferer, Map<String, String> cookiesValues) | |
220 | throws IOException { | |
221 | URLConnection conn = url.openConnection(); | |
222 | ||
223 | conn.setRequestProperty("User-Agent", UA); | |
224 | conn.setRequestProperty("Cookie", generateCookies(cookiesValues)); | |
225 | conn.setRequestProperty("Accept-Encoding", "gzip"); | |
226 | if (currentReferer != null) { | |
227 | conn.setRequestProperty("Referer", currentReferer.toString()); | |
228 | conn.setRequestProperty("Host", currentReferer.getHost()); | |
229 | } | |
230 | ||
231 | return conn; | |
232 | } | |
233 | ||
234 | /** | |
235 | * Generate the cookie {@link String} from the local {@link CookieStore} so | |
236 | * it is ready to be passed. | |
237 | * | |
238 | * @return the cookie | |
239 | */ | |
240 | private String generateCookies(Map<String, String> cookiesValues) { | |
241 | StringBuilder builder = new StringBuilder(); | |
242 | for (HttpCookie cookie : cookies.getCookieStore().getCookies()) { | |
243 | if (builder.length() > 0) { | |
244 | builder.append(';'); | |
245 | } | |
246 | ||
247 | // TODO: check if format is ok | |
248 | builder.append(cookie.toString()); | |
249 | } | |
250 | ||
251 | if (cookiesValues != null) { | |
252 | for (Map.Entry<String, String> set : cookiesValues.entrySet()) { | |
253 | if (builder.length() > 0) { | |
254 | builder.append(';'); | |
255 | } | |
256 | builder.append(set.getKey()); | |
257 | builder.append('='); | |
258 | builder.append(set.getValue()); | |
259 | } | |
260 | } | |
261 | ||
262 | return builder.toString(); | |
263 | } | |
264 | } |