echo "MAIN = be/nikiroo/utils/test/Test" > Makefile
-echo "MORE = be/nikiroo/utils/MarkableFileInputStream be/nikiroo/utils/ui/UIUtils be/nikiroo/utils/ui/WrapLayout be/nikiroo/utils/ui/ProgressBar" >> Makefile
+echo "MORE = be/nikiroo/utils/MarkableFileInputStream be/nikiroo/utils/ui/UIUtils be/nikiroo/utils/ui/WrapLayout be/nikiroo/utils/ui/ProgressBar be/nikiroo/utils/Downloader be/nikiroo/utils/Cache" >> Makefile
echo "TEST = be/nikiroo/utils/test/Test" >> Makefile
echo "TEST_PARAMS = $cols $ok $ko" >> Makefile
echo "NAME = nikiroo-utils" >> Makefile
--- /dev/null
+package be.nikiroo.utils;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.Date;
+
+/**
+ * A generic cache system, with special support for {@link URL}s.
+ * <p>
+ * This cache also manages timeout information.
+ *
+ * @author niki
+ */
+public class Cache {
+ private File dir;
+ private long tooOldChanging;
+ private long tooOldStable;
+
+ /**
+ * Create a new {@link Cache} object.
+ *
+ * @param dir
+ * the directory to use as cache
+ * @param hoursChanging
+ * the number of hours after which a cached file that is thought
+ * to change ~often is considered too old (or -1 for
+ * "never too old")
+ * @param hoursStable
+ * the number of hours after which a cached file that is thought
+ * to change rarely is considered too old (or -1 for
+ * "never too old")
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ public Cache(File dir, int hoursChanging, int hoursStable)
+ throws IOException {
+ this.dir = dir;
+ this.tooOldChanging = 1000 * 60 * 60 * hoursChanging;
+ this.tooOldStable = 1000 * 60 * 60 * hoursStable;
+
+ if (dir != null && !dir.exists()) {
+ dir.mkdirs();
+ }
+
+ if (dir == null || !dir.exists()) {
+ throw new IOException("Cannot create the cache directory: "
+ + (dir == null ? "null" : dir.getAbsolutePath()));
+ }
+ }
+
+ /**
+ * Check the resource to see if it is in the cache.
+ *
+ * @param url
+ * the resource to check
+ * @param allowTooOld
+ * allow files even if they are considered too old
+ * @param stable
+ * a stable file (that dones't change too often) -- parameter
+ * used to check if the file is too old to keep or not
+ *
+ * @return TRUE if it is
+ *
+ */
+ public boolean check(URL url, boolean allowTooOld, boolean stable) {
+ File file = getCached(url);
+ if (file.exists()) {
+ if (allowTooOld || !isOld(file, stable)) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Clean the cache (delete the cached items).
+ *
+ * @param onlyOld
+ * only clean the files that are considered too old for a stable
+ * resource
+ *
+ * @return the number of cleaned items
+ */
+ public int clean(boolean onlyOld) {
+ return clean(onlyOld, dir);
+ }
+
+ /**
+ * Trace information (info/error) generated by this class.
+ * <p>
+ * You can override it if you don't want the default sysout/syserr.
+ *
+ * @param message
+ * the message
+ * @param error
+ * TRUE for error messages, FALSE for information messages
+ */
+ protected void trace(String message, boolean error) {
+ if (error) {
+ System.err.println(message);
+ } else {
+ System.out.println(message);
+ }
+ }
+
+ /**
+ * Clean the cache (delete the cached items) in the given cache directory.
+ *
+ * @param onlyOld
+ * only clean the files that are considered too old for stable
+ * resources
+ * @param cacheDir
+ * the cache directory to clean
+ *
+ * @return the number of cleaned items
+ */
+ private int clean(boolean onlyOld, File cacheDir) {
+ int num = 0;
+ for (File file : cacheDir.listFiles()) {
+ if (file.isDirectory()) {
+ num += clean(onlyOld, file);
+ } else {
+ if (!onlyOld || isOld(file, true)) {
+ if (file.delete()) {
+ num++;
+ } else {
+ trace("Cannot delete temporary file: "
+ + file.getAbsolutePath(), true);
+ }
+ }
+ }
+ }
+
+ return num;
+ }
+
+ /**
+ * Open a resource from the cache if it exists.
+ *
+ * @param uniqueID
+ * the unique ID
+ * @param allowTooOld
+ * allow files even if they are considered too old
+ * @param stable
+ * a stable file (that dones't change too often) -- parameter
+ * used to check if the file is too old to keep or not
+ *
+ * @return the opened resource if found, NULL if not
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ public InputStream load(String uniqueID, boolean allowTooOld, boolean stable) {
+ return load(getCached(uniqueID), allowTooOld, stable);
+ }
+
+ /**
+ * Open a resource from the cache if it exists.
+ *
+ * @param url
+ * the resource to open
+ * @param allowTooOld
+ * allow files even if they are considered too old
+ * @param stable
+ * a stable file (that dones't change too often) -- parameter
+ * used to check if the file is too old to keep or not
+ *
+ * @return the opened resource if found, NULL if not
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ public InputStream load(URL url, boolean allowTooOld, boolean stable)
+ throws IOException {
+ return load(getCached(url), allowTooOld, stable);
+ }
+
+ /**
+ * Open a resource from the cache if it exists.
+ *
+ * @param url
+ * the resource to open
+ * @param allowTooOld
+ * allow files even if they are considered too old
+ * @param stable
+ * a stable file (that dones't change too often) -- parameter
+ * used to check if the file is too old to keep or not
+ *
+ * @return the opened resource if found, NULL if not
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ private InputStream load(File cached, boolean allowTooOld, boolean stable) {
+ if (cached.exists() && (allowTooOld || !isOld(cached, stable))) {
+ try {
+ return new MarkableFileInputStream(new FileInputStream(cached));
+ } catch (FileNotFoundException e) {
+ return null;
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Save the given resource to the cache.
+ *
+ * @param in
+ * the input data
+ * @param uniqueID
+ * a unique ID used to locate the cached resource
+ *
+ * @return the resulting {@link File}
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ public File save(InputStream in, String uniqueID) throws IOException {
+ File cached = getCached(uniqueID);
+ cached.getParentFile().mkdirs();
+ return save(in, cached);
+ }
+
+ /**
+ * Save the given resource to the cache.
+ *
+ * @param in
+ * the input data
+ * @param url
+ * the {@link URL} used to locate the cached resource
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ public File save(InputStream in, URL url) throws IOException {
+ File cached = getCached(url);
+ return save(in, cached);
+ }
+
+ /**
+ * Save the given resource to the cache.
+ *
+ * @param in
+ * the input data
+ * @param cached
+ * the cached {@link File} to save to
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ private File save(InputStream in, File cached) throws IOException {
+ IOUtils.write(in, cached);
+ return cached;
+ }
+
+ /**
+ * Check if the {@link File} is too old according to
+ * {@link Cache#tooOldChanging}.
+ *
+ * @param file
+ * the file to check
+ * @param stable
+ * TRUE to denote stable files, that are not supposed to change
+ * too often
+ *
+ * @return TRUE if it is
+ */
+ private boolean isOld(File file, boolean stable) {
+ long max = tooOldChanging;
+ if (stable) {
+ max = tooOldStable;
+ }
+
+ if (max < 0) {
+ return false;
+ }
+
+ long time = new Date().getTime() - file.lastModified();
+ if (time < 0) {
+ trace("Timestamp in the future for file: " + file.getAbsolutePath(),
+ true);
+ }
+
+ return time < 0 || time > max;
+ }
+
+ /**
+ * Return the associated cache {@link File} from this {@link URL}.
+ *
+ * @param url
+ * the {@link URL}
+ *
+ * @return the cached {@link File} version of this {@link URL}
+ */
+ private File getCached(URL url) {
+ File subdir;
+
+ String name = url.getHost();
+ if (name == null || name.isEmpty()) {
+ // File
+ File file = new File(url.getFile());
+ subdir = new File(file.getParent().replace("..", "__"));
+ subdir = new File(dir, allowedChars(subdir.getPath()));
+ name = allowedChars(url.getFile());
+ } else {
+ // URL
+ File subsubDir = new File(dir, allowedChars(url.getHost()));
+ subdir = new File(subsubDir, "_" + allowedChars(url.getPath()));
+ name = allowedChars("_" + url.getQuery());
+ }
+
+ File cacheFile = new File(subdir, name);
+ subdir.mkdirs();
+
+ return cacheFile;
+ }
+
+ /**
+ * Get the basic cache resource file corresponding to this unique ID.
+ * <p>
+ * Note that you may need to add a sub-directory in some cases.
+ *
+ * @param uniqueID
+ * the id
+ *
+ * @return the cached version if present, NULL if not
+ */
+ private File getCached(String uniqueID) {
+ File file = new File(dir, allowedChars(uniqueID));
+ File subdir = new File(file.getParentFile(), "_");
+ return new File(subdir, file.getName());
+ }
+
+ /**
+ * Replace not allowed chars (in a {@link File}) by "_".
+ *
+ * @param raw
+ * the raw {@link String}
+ *
+ * @return the sanitised {@link String}
+ */
+ private String allowedChars(String raw) {
+ return raw.replace('/', '_').replace(':', '_').replace("\\", "_");
+ }
+}
\ No newline at end of file
--- /dev/null
+package be.nikiroo.utils;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStreamWriter;
+import java.net.CookieHandler;
+import java.net.CookieManager;
+import java.net.CookiePolicy;
+import java.net.CookieStore;
+import java.net.HttpCookie;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLConnection;
+import java.net.URLEncoder;
+import java.util.Map;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * This class will help you download content from Internet Sites ({@link URL}
+ * based).
+ * <p>
+ * It allows you to control some options often required on web sites that do not
+ * want to simply serve HTML, but actively makes your life difficult with stupid
+ * checks.
+ *
+ * @author niki
+ */
+public class Downloader {
+ private String UA;
+ private CookieManager cookies;
+
+ /**
+ * Create a new {@link Downloader}.
+ *
+ * @param UA
+ * the User-Agent to use to download the resources -- note that
+ * some websites require one, some actively blacklist real UAs
+ * like the one from wget, some whitelist a couple of browsers
+ * only (!)
+ */
+ public Downloader(String UA) {
+ this.UA = UA;
+
+ cookies = new CookieManager();
+ cookies.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
+ CookieHandler.setDefault(cookies);
+ }
+
+ /**
+ * Clear all the cookies currently in the jar.
+ * <p>
+ * As long as you don't, the cookies are kept.
+ */
+ public void clearCookies() {
+ cookies.getCookieStore().removeAll();
+ }
+
+ /**
+ * Open the given {@link URL} and update the cookies.
+ *
+ * @param url
+ * the {@link URL} to open
+ * @return the {@link InputStream} of the opened page
+ *
+ * @throws IOException
+ * in case of I/O error
+ **/
+ public InputStream open(URL url) throws IOException {
+ return open(url, url, url, null, null, null, null);
+ }
+
+ /**
+ * Open the given {@link URL} and update the cookies.
+ *
+ * @param url
+ * the {@link URL} to open
+ * @param postParams
+ * the POST parameters
+ * @param getParams
+ * the GET parameters (priority over POST)
+ * @param oauth
+ * OAuth authorization (aka, "bearer XXXXXXX")
+ *
+ * @return the {@link InputStream} of the opened page
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ public InputStream open(URL url, URL currentReferer,
+ Map<String, String> cookiesValues, Map<String, String> postParams,
+ Map<String, String> getParams, String oauth) throws IOException {
+ return open(url, url, currentReferer, cookiesValues, postParams,
+ getParams, oauth);
+ }
+
+ /**
+ * Trace information (info/error) generated by this class.
+ * <p>
+ * You can override it if you don't want the default sysout/syserr.
+ *
+ * @param message
+ * the message
+ * @param error
+ * TRUE for error messages, FALSE for information messages
+ */
+ protected void trace(String message, boolean error) {
+ if (error) {
+ System.err.println(message);
+ } else {
+ System.out.println(message);
+ }
+ }
+
+ /**
+ * Open the given {@link URL} and update the cookies.
+ *
+ * @param url
+ * the {@link URL} to open
+ * @param originalUrl
+ * the original {@link URL} before any redirection occurs
+ * @param postParams
+ * the POST parameters
+ * @param getParams
+ * the GET parameters (priority over POST)
+ * @param oauth
+ * OAuth authorisation (aka, "bearer XXXXXXX")
+ * @return the {@link InputStream} of the opened page
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ private InputStream open(URL url, final URL originalUrl,
+ URL currentReferer, Map<String, String> cookiesValues,
+ Map<String, String> postParams, Map<String, String> getParams,
+ String oauth) throws IOException {
+
+ trace("Download: " + url, false);
+
+ URLConnection conn = openConnectionWithCookies(url, currentReferer,
+ cookiesValues);
+
+ // Priority: GET over POST
+ Map<String, String> params = getParams;
+ if (getParams == null) {
+ params = postParams;
+ }
+
+ if ((params != null || oauth != null)
+ && conn instanceof HttpURLConnection) {
+ StringBuilder requestData = null;
+ if (params != null) {
+ requestData = new StringBuilder();
+ for (Map.Entry<String, String> param : params.entrySet()) {
+ if (requestData.length() != 0)
+ requestData.append('&');
+ requestData.append(URLEncoder.encode(param.getKey(),
+ "UTF-8"));
+ requestData.append('=');
+ requestData.append(URLEncoder.encode(
+ String.valueOf(param.getValue()), "UTF-8"));
+ }
+
+ conn.setDoOutput(true);
+
+ if (getParams == null && postParams != null) {
+ ((HttpURLConnection) conn).setRequestMethod("POST");
+ }
+
+ conn.setRequestProperty("Content-Type",
+ "application/x-www-form-urlencoded");
+ conn.setRequestProperty("charset", "utf-8");
+ }
+
+ if (oauth != null) {
+ conn.setRequestProperty("Authorization", oauth);
+ }
+
+ if (requestData != null) {
+ OutputStreamWriter writer = new OutputStreamWriter(
+ conn.getOutputStream());
+
+ writer.write(requestData.toString());
+ writer.flush();
+ writer.close();
+ }
+ }
+
+ conn.connect();
+
+ // Check if redirect
+ if (conn instanceof HttpURLConnection
+ && ((HttpURLConnection) conn).getResponseCode() / 100 == 3) {
+ String newUrl = conn.getHeaderField("Location");
+ return open(new URL(newUrl), originalUrl, currentReferer,
+ cookiesValues, postParams, getParams, oauth);
+ }
+
+ InputStream in = conn.getInputStream();
+ if ("gzip".equals(conn.getContentEncoding())) {
+ in = new GZIPInputStream(in);
+ }
+
+ return in;
+ }
+
+ /**
+ * Open a connection on the given {@link URL}, and manage the cookies that
+ * come with it.
+ *
+ * @param url
+ * the {@link URL} to open
+ *
+ * @return the connection
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ private URLConnection openConnectionWithCookies(URL url,
+ URL currentReferer, Map<String, String> cookiesValues)
+ throws IOException {
+ URLConnection conn = url.openConnection();
+
+ conn.setRequestProperty("User-Agent", UA);
+ conn.setRequestProperty("Cookie", generateCookies(cookiesValues));
+ conn.setRequestProperty("Accept-Encoding", "gzip");
+ if (currentReferer != null) {
+ conn.setRequestProperty("Referer", currentReferer.toString());
+ conn.setRequestProperty("Host", currentReferer.getHost());
+ }
+
+ return conn;
+ }
+
+ /**
+ * Generate the cookie {@link String} from the local {@link CookieStore} so
+ * it is ready to be passed.
+ *
+ * @return the cookie
+ */
+ private String generateCookies(Map<String, String> cookiesValues) {
+ StringBuilder builder = new StringBuilder();
+ for (HttpCookie cookie : cookies.getCookieStore().getCookies()) {
+ if (builder.length() > 0) {
+ builder.append(';');
+ }
+
+ // TODO: check if format is ok
+ builder.append(cookie.toString());
+ }
+
+ if (cookiesValues != null) {
+ for (Map.Entry<String, String> set : cookiesValues.entrySet()) {
+ if (builder.length() > 0) {
+ builder.append(';');
+ }
+ builder.append(set.getKey());
+ builder.append('=');
+ builder.append(set.getValue());
+ }
+ }
+
+ return builder.toString();
+ }
+}