New FimFiction.net API downloading:
[fanfix.git] / src / be / nikiroo / fanfix / Cache.java
1 package be.nikiroo.fanfix;
2
3 import java.io.BufferedOutputStream;
4 import java.io.File;
5 import java.io.FileInputStream;
6 import java.io.FileNotFoundException;
7 import java.io.FileOutputStream;
8 import java.io.IOException;
9 import java.io.InputStream;
10 import java.io.OutputStreamWriter;
11 import java.net.CookieHandler;
12 import java.net.CookieManager;
13 import java.net.CookiePolicy;
14 import java.net.CookieStore;
15 import java.net.HttpCookie;
16 import java.net.HttpURLConnection;
17 import java.net.URL;
18 import java.net.URLConnection;
19 import java.net.URLEncoder;
20 import java.util.Date;
21 import java.util.Map;
22 import java.util.zip.GZIPInputStream;
23
24 import javax.imageio.ImageIO;
25
26 import be.nikiroo.fanfix.bundles.Config;
27 import be.nikiroo.fanfix.supported.BasicSupport;
28 import be.nikiroo.utils.IOUtils;
29 import be.nikiroo.utils.ImageUtils;
30 import be.nikiroo.utils.MarkableFileInputStream;
31
32 /**
33 * This cache will manage Internet (and local) downloads, as well as put the
34 * downloaded files into a cache.
35 * <p>
36 * As long the cached resource is not too old, it will use it instead of
37 * retrieving the file again.
38 *
39 * @author niki
40 */
41 public class Cache {
42 private File dir;
43 private String UA;
44 private long tooOldChanging;
45 private long tooOldStable;
46 private CookieManager cookies;
47
48 /**
49 * Create a new {@link Cache} object.
50 *
51 * @param dir
52 * the directory to use as cache
53 * @param UA
54 * the User-Agent to use to download the resources
55 * @param hoursChanging
56 * the number of hours after which a cached file that is thought
57 * to change ~often is considered too old (or -1 for
58 * "never too old")
59 * @param hoursStable
60 * the number of hours after which a LARGE cached file that is
61 * thought to change rarely is considered too old (or -1 for
62 * "never too old")
63 *
64 * @throws IOException
65 * in case of I/O error
66 */
67 public Cache(File dir, String UA, int hoursChanging, int hoursStable)
68 throws IOException {
69 this.dir = dir;
70 this.UA = UA;
71 this.tooOldChanging = 1000 * 60 * 60 * hoursChanging;
72 this.tooOldStable = 1000 * 60 * 60 * hoursStable;
73
74 if (dir != null) {
75 if (!dir.exists()) {
76 dir.mkdirs();
77 }
78 }
79
80 if (dir == null || !dir.exists()) {
81 throw new IOException("Cannot create the cache directory: "
82 + (dir == null ? "null" : dir.getAbsolutePath()));
83 }
84
85 cookies = new CookieManager();
86 cookies.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
87 CookieHandler.setDefault(cookies);
88 }
89
90 /**
91 * Clear all the cookies currently in the jar.
92 */
93 public void clearCookies() {
94 cookies.getCookieStore().removeAll();
95 }
96
97 /**
98 * Open a resource (will load it from the cache if possible, or save it into
99 * the cache after downloading if not).
100 *
101 * @param url
102 * the resource to open
103 * @param support
104 * the support to use to download the resource
105 * @param stable
106 * TRUE for more stable resources, FALSE when they often change
107 *
108 * @return the opened resource, NOT NULL
109 *
110 * @throws IOException
111 * in case of I/O error
112 */
113 public InputStream open(URL url, BasicSupport support, boolean stable)
114 throws IOException {
115 // MUST NOT return null
116 return open(url, support, stable, url);
117 }
118
119 /**
120 * Open a resource (will load it from the cache if possible, or save it into
121 * the cache after downloading if not).
122 * <p>
123 * The cached resource will be assimilated to the given original {@link URL}
124 *
125 * @param url
126 * the resource to open
127 * @param support
128 * the support to use to download the resource
129 * @param stable
130 * TRUE for more stable resources, FALSE when they often change
131 * @param originalUrl
132 * the original {@link URL} used to locate the cached resource
133 *
134 * @return the opened resource, NOT NULL
135 *
136 * @throws IOException
137 * in case of I/O error
138 */
139 public InputStream open(URL url, BasicSupport support, boolean stable,
140 URL originalUrl) throws IOException {
141 // MUST NOT return null
142 try {
143 InputStream in = load(originalUrl, false, stable);
144 if (Instance.isDebug()) {
145 System.err.println("Cache " + (in != null ? "hit" : "miss")
146 + ": " + url);
147 }
148
149 if (in == null) {
150
151 try {
152 save(url, support, originalUrl);
153 } catch (IOException e) {
154 throw new IOException("Cannot save the url: "
155 + (url == null ? "null" : url.toString()), e);
156 }
157
158 // Was just saved, can load old, so, will not be null
159 in = load(originalUrl, true, stable);
160 }
161
162 return in;
163 } catch (IOException e) {
164 throw new IOException("Cannot open the url: "
165 + (url == null ? "null" : url.toString()), e);
166 }
167 }
168
169 /**
170 * Open the given {@link URL} without using the cache, but still using and
171 * updating the cookies.
172 *
173 * @param url
174 * the {@link URL} to open
175 * @param support
176 * the {@link BasicSupport} used for the cookies
177 *
178 * @return the {@link InputStream} of the opened page
179 *
180 * @throws IOException
181 * in case of I/O error
182 */
183 public InputStream openNoCache(URL url, BasicSupport support)
184 throws IOException {
185 return openNoCache(url, support, url, null, null, null);
186 }
187
188 /**
189 * Open the given {@link URL} without using the cache, but still using and
190 * updating the cookies.
191 *
192 * @param url
193 * the {@link URL} to open
194 * @param support
195 * the {@link BasicSupport} used for the cookies
196 * @param postParams
197 * the POST parameters
198 * @param getParams
199 * the GET parameters (priority over POST)
200 * @param oauth
201 * OAuth authorization (aka, "bearer XXXXXXX")
202 *
203 * @return the {@link InputStream} of the opened page
204 *
205 * @throws IOException
206 * in case of I/O error
207 */
208 public InputStream openNoCache(URL url, BasicSupport support,
209 Map<String, String> postParams, Map<String, String> getParams,
210 String oauth) throws IOException {
211 return openNoCache(url, support, url, postParams, getParams, oauth);
212 }
213
214 /**
215 * Open the given {@link URL} without using the cache, but still using and
216 * updating the cookies.
217 *
218 * @param url
219 * the {@link URL} to open
220 * @param support
221 * the {@link BasicSupport} used for the cookies
222 * @param originalUrl
223 * the original {@link URL} before any redirection occurs
224 * @param postParams
225 * the POST parameters
226 * @param getParams
227 * the GET parameters (priority over POST)
228 * @param oauth
229 * OAuth authorization (aka, "bearer XXXXXXX")
230 * @return the {@link InputStream} of the opened page
231 *
232 * @throws IOException
233 * in case of I/O error
234 */
235 private InputStream openNoCache(URL url, BasicSupport support,
236 final URL originalUrl, Map<String, String> postParams,
237 Map<String, String> getParams, String oauth) throws IOException {
238
239 if (Instance.isDebug()) {
240 System.err.println("Open no cache: " + url);
241 }
242
243 URLConnection conn = openConnectionWithCookies(url, support);
244 if (support != null) {
245 // priority: arguments
246 if (oauth == null) {
247 oauth = support.getOAuth();
248 }
249 }
250
251 // Priority: GET over POST
252 Map<String, String> params = getParams;
253 if (getParams == null) {
254 params = postParams;
255 }
256
257 if ((params != null || oauth != null)
258 && conn instanceof HttpURLConnection) {
259 StringBuilder requestData = null;
260 if (params != null) {
261 requestData = new StringBuilder();
262 for (Map.Entry<String, String> param : params.entrySet()) {
263 if (requestData.length() != 0)
264 requestData.append('&');
265 requestData.append(URLEncoder.encode(param.getKey(),
266 "UTF-8"));
267 requestData.append('=');
268 requestData.append(URLEncoder.encode(
269 String.valueOf(param.getValue()), "UTF-8"));
270 }
271
272 conn.setDoOutput(true);
273
274 if (getParams == null && postParams != null) {
275 ((HttpURLConnection) conn).setRequestMethod("POST");
276 }
277
278 conn.setRequestProperty("Content-Type",
279 "application/x-www-form-urlencoded");
280 conn.setRequestProperty("charset", "utf-8");
281 }
282
283 if (oauth != null) {
284 conn.setRequestProperty("Authorization", oauth);
285 }
286
287 if (requestData != null) {
288 OutputStreamWriter writer = new OutputStreamWriter(
289 conn.getOutputStream());
290
291 writer.write(requestData.toString());
292 writer.flush();
293 writer.close();
294 }
295 }
296
297 conn.connect();
298
299 // Check if redirect
300 if (conn instanceof HttpURLConnection
301 && ((HttpURLConnection) conn).getResponseCode() / 100 == 3) {
302 String newUrl = conn.getHeaderField("Location");
303 return openNoCache(new URL(newUrl), support, originalUrl,
304 postParams, getParams, oauth);
305 }
306
307 InputStream in = conn.getInputStream();
308 if ("gzip".equals(conn.getContentEncoding())) {
309 in = new GZIPInputStream(in);
310 }
311
312 return in;
313 }
314
315 /**
316 * Refresh the resource into cache if needed.
317 *
318 * @param url
319 * the resource to open
320 * @param support
321 * the support to use to download the resource
322 * @param stable
323 * TRUE for more stable resources, FALSE when they often change
324 *
325 * @throws IOException
326 * in case of I/O error
327 */
328 public void refresh(URL url, BasicSupport support, boolean stable)
329 throws IOException {
330 File cached = getCached(url);
331 if (cached.exists() && !isOld(cached, stable)) {
332 return;
333 }
334
335 open(url, support, stable).close();
336 }
337
338 /**
339 * Check the resource to see if it is in the cache.
340 *
341 * @param url
342 * the resource to check
343 *
344 * @return TRUE if it is
345 *
346 */
347 public boolean check(URL url) {
348 return getCached(url).exists();
349 }
350
351 /**
352 * Save the given resource as an image on disk using the default image
353 * format for content.
354 *
355 * @param url
356 * the resource
357 * @param target
358 * the target file
359 *
360 * @throws IOException
361 * in case of I/O error
362 */
363 public void saveAsImage(URL url, File target) throws IOException {
364 URL cachedUrl = new URL(url.toString());
365 File cached = getCached(cachedUrl);
366
367 if (!cached.exists() || isOld(cached, true)) {
368 InputStream imageIn = open(url, null, true);
369 ImageIO.write(ImageUtils.fromStream(imageIn), Instance.getConfig()
370 .getString(Config.IMAGE_FORMAT_CONTENT).toLowerCase(),
371 cached);
372 }
373
374 IOUtils.write(new FileInputStream(cached), target);
375 }
376
377 /**
378 * Manually add this item to the cache.
379 *
380 * @param in
381 * the input data
382 * @param uniqueID
383 * a unique ID for this resource
384 *
385 * @return the resulting {@link File}
386 *
387 * @throws IOException
388 * in case of I/O error
389 */
390 public File addToCache(InputStream in, String uniqueID) throws IOException {
391 File file = getCached(uniqueID);
392 File subdir = new File(file.getParentFile(), "_");
393 file = new File(subdir, file.getName());
394 subdir.mkdir();
395 IOUtils.write(in, file);
396 return file;
397 }
398
399 /**
400 * Return the {@link InputStream} corresponding to the given unique ID, or
401 * NULL if none found.
402 *
403 * @param uniqueID
404 * the unique ID
405 *
406 * @return the content or NULL
407 */
408 public InputStream getFromCache(String uniqueID) {
409 File file = getCached(uniqueID);
410 File subdir = new File(file.getParentFile(), "_");
411 file = new File(subdir, file.getName());
412 if (file.exists()) {
413 try {
414 return new MarkableFileInputStream(new FileInputStream(file));
415 } catch (FileNotFoundException e) {
416 }
417 }
418
419 return null;
420 }
421
422 /**
423 * Clean the cache (delete the cached items).
424 *
425 * @param onlyOld
426 * only clean the files that are considered too old
427 *
428 * @return the number of cleaned items
429 */
430 public int cleanCache(boolean onlyOld) {
431 return cleanCache(onlyOld, dir);
432 }
433
434 /**
435 * Clean the cache (delete the cached items) in the given cache directory.
436 *
437 * @param onlyOld
438 * only clean the files that are considered too old
439 * @param cacheDir
440 * the cache directory to clean
441 *
442 * @return the number of cleaned items
443 */
444 private int cleanCache(boolean onlyOld, File cacheDir) {
445 int num = 0;
446 for (File file : cacheDir.listFiles()) {
447 if (file.isDirectory()) {
448 num += cleanCache(onlyOld, file);
449 } else {
450 if (!onlyOld || isOld(file, true)) {
451 if (file.delete()) {
452 num++;
453 } else {
454 System.err.println("Cannot delete temporary file: "
455 + file.getAbsolutePath());
456 }
457 }
458 }
459 }
460
461 return num;
462 }
463
464 /**
465 * Open a resource from the cache if it exists.
466 *
467 * @param url
468 * the resource to open
469 * @param allowTooOld
470 * allow files even if they are considered too old
471 * @param stable
472 * a stable file (that dones't change too often) -- parameter
473 * used to check if the file is too old to keep or not
474 *
475 * @return the opened resource if found, NULL i not
476 *
477 * @throws IOException
478 * in case of I/O error
479 */
480 private InputStream load(URL url, boolean allowTooOld, boolean stable)
481 throws IOException {
482 File cached = getCached(url);
483 if (cached.exists() && (allowTooOld || !isOld(cached, stable))) {
484 return new MarkableFileInputStream(new FileInputStream(cached));
485 }
486
487 return null;
488 }
489
490 /**
491 * Save the given resource to the cache.
492 *
493 * @param url
494 * the resource
495 * @param support
496 * the {@link BasicSupport} used to download it
497 * @param originalUrl
498 * the original {@link URL} used to locate the cached resource
499 *
500 * @throws IOException
501 * in case of I/O error
502 */
503 private void save(URL url, BasicSupport support, URL originalUrl)
504 throws IOException {
505 InputStream in = openNoCache(url, support, originalUrl, null, null,
506 null);
507 try {
508 File cached = getCached(originalUrl);
509 BufferedOutputStream out = new BufferedOutputStream(
510 new FileOutputStream(cached));
511 try {
512 byte[] buf = new byte[4096];
513 int len;
514 while ((len = in.read(buf)) > 0) {
515 out.write(buf, 0, len);
516 }
517 } finally {
518 out.close();
519 }
520 } finally {
521 in.close();
522 }
523 }
524
525 /**
526 * Open a connection on the given {@link URL}, and manage the cookies that
527 * come with it.
528 *
529 * @param url
530 * the {@link URL} to open
531 * @param support
532 * the {@link BasicSupport} to use for cookie generation
533 *
534 * @return the connection
535 *
536 * @throws IOException
537 * in case of I/O error
538 */
539 private URLConnection openConnectionWithCookies(URL url,
540 BasicSupport support) throws IOException {
541 URLConnection conn = url.openConnection();
542
543 conn.setRequestProperty("User-Agent", UA);
544 conn.setRequestProperty("Cookie", generateCookies(support));
545 conn.setRequestProperty("Accept-Encoding", "gzip");
546 if (support != null && support.getCurrentReferer() != null) {
547 conn.setRequestProperty("Referer", support.getCurrentReferer()
548 .toString());
549 conn.setRequestProperty("Host", support.getCurrentReferer()
550 .getHost());
551 }
552
553 return conn;
554 }
555
556 /**
557 * Check if the {@link File} is too old according to
558 * {@link Cache#tooOldChanging}.
559 *
560 * @param file
561 * the file to check
562 * @param stable
563 * TRUE to denote files that are not supposed to change too often
564 *
565 * @return TRUE if it is
566 */
567 private boolean isOld(File file, boolean stable) {
568 long max = tooOldChanging;
569 if (stable) {
570 max = tooOldStable;
571 }
572
573 if (max < 0) {
574 return false;
575 }
576
577 long time = new Date().getTime() - file.lastModified();
578 if (time < 0) {
579 System.err.println("Timestamp in the future for file: "
580 + file.getAbsolutePath());
581 }
582
583 return time < 0 || time > max;
584 }
585
586 /**
587 * Return the associated cache {@link File} from this {@link URL}.
588 *
589 * @param url
590 * the url
591 *
592 * @return the cached {@link File} version of this {@link URL}
593 */
594 private File getCached(URL url) {
595 File subdir = null;
596
597 String name = url.getHost();
598 if (name == null || name.isEmpty()) {
599 name = url.getFile();
600 } else {
601 File cacheDir = getCached(".").getParentFile();
602 File subsubDir = new File(cacheDir, allowedChars(url.getHost()));
603 subdir = new File(subsubDir, "_" + allowedChars(url.getPath()));
604 name = allowedChars("_" + url.getQuery());
605 }
606
607 File cacheFile = getCached(name);
608 if (subdir != null) {
609 cacheFile = new File(subdir, cacheFile.getName());
610 subdir.mkdirs();
611 }
612
613 return cacheFile;
614 }
615
616 /**
617 * Get the basic cache resource file corresponding to this unique ID.
618 * <p>
619 * Note that you may need to add a sub-directory in some cases.
620 *
621 * @param uniqueID
622 * the id
623 *
624 * @return the cached version if present, NULL if not
625 */
626 private File getCached(String uniqueID) {
627 return new File(dir, allowedChars(uniqueID));
628 }
629
630 /**
631 * Replace not allowed chars (in a {@link File}) by "_".
632 *
633 * @param raw
634 * the raw {@link String}
635 *
636 * @return the sanitised {@link String}
637 */
638 private String allowedChars(String raw) {
639 return raw.replace('/', '_').replace(':', '_').replace("\\", "_");
640 }
641
642 /**
643 * Generate the cookie {@link String} from the local {@link CookieStore} so
644 * it is ready to be passed.
645 *
646 * @return the cookie
647 */
648 private String generateCookies(BasicSupport support) {
649 StringBuilder builder = new StringBuilder();
650 for (HttpCookie cookie : cookies.getCookieStore().getCookies()) {
651 if (builder.length() > 0) {
652 builder.append(';');
653 }
654
655 // TODO: check if format is ok
656 builder.append(cookie.toString());
657 }
658
659 if (support != null) {
660 for (Map.Entry<String, String> set : support.getCookies()
661 .entrySet()) {
662 if (builder.length() > 0) {
663 builder.append(';');
664 }
665 builder.append(set.getKey());
666 builder.append('=');
667 builder.append(set.getValue());
668 }
669 }
670
671 return builder.toString();
672 }
673 }