import be.nikiroo.utils.Cache;
import be.nikiroo.utils.CacheMemory;
import be.nikiroo.utils.Downloader;
-import be.nikiroo.utils.IOUtils;
import be.nikiroo.utils.Image;
import be.nikiroo.utils.ImageUtils;
import be.nikiroo.utils.TraceHandler;
*/
public class DataLoader {
private Downloader downloader;
- private Cache downloadCache;
+ private Downloader downloaderNoCache;
private Cache cache;
/**
*/
public DataLoader(File dir, String UA, int hoursChanging, int hoursStable)
throws IOException {
- downloader = new Downloader(UA);
- downloadCache = new Cache(dir, hoursChanging, hoursStable);
- cache = downloadCache;
+ downloader = new Downloader(UA, new Cache(dir, hoursChanging,
+ hoursStable));
+ downloaderNoCache = new Downloader(UA);
+
+ cache = downloader.getCache();
}
/**
*/
public DataLoader(String UA) {
downloader = new Downloader(UA);
- downloadCache = null;
+ downloaderNoCache = downloader;
cache = new CacheMemory();
}
*/
public void setTraceHandler(TraceHandler tracer) {
downloader.setTraceHandler(tracer);
+ downloaderNoCache.setTraceHandler(tracer);
cache.setTraceHandler(tracer);
- if (downloadCache != null) {
- downloadCache.setTraceHandler(tracer);
+ if (downloader.getCache() != null) {
+ downloader.getCache().setTraceHandler(tracer);
}
}
/**
* Open a resource (will load it from the cache if possible, or save it into
* the cache after downloading if not).
+ * <p>
+ * The cached resource will be assimilated to the given original {@link URL}
*
* @param url
* the resource to open
*/
public InputStream open(URL url, BasicSupport support, boolean stable)
throws IOException {
- // MUST NOT return null
- return open(url, support, stable, url);
+ return open(url, url, support, stable, null, null, null);
}
/**
*
* @param url
* the resource to open
+ * @param originalUrl
+ * the original {@link URL} before any redirection occurs, which
+ * is also used for the cache ID if needed (so we can retrieve
+ * the content with this URL if needed)
* @param support
* the support to use to download the resource
* @param stable
* TRUE for more stable resources, FALSE when they often change
- * @param originalUrl
- * the original {@link URL} used to locate the cached resource
*
* @return the opened resource, NOT NULL
*
* @throws IOException
* in case of I/O error
*/
- public InputStream open(URL url, BasicSupport support, boolean stable,
- URL originalUrl) throws IOException {
- // MUST NOT return null
- try {
- InputStream in = null;
-
- if (downloadCache != null) {
- in = downloadCache.load(originalUrl, false, stable);
- Instance.getTraceHandler().trace(
- "Cache " + (in != null ? "hit" : "miss") + ": " + url);
- }
-
- if (in == null) {
- try {
- in = openNoCache(url, support, null, null, null);
- if (downloadCache != null) {
- downloadCache.save(in, originalUrl);
- // ..But we want a resetable stream
- in.close();
- in = downloadCache.load(originalUrl, true, stable);
- } else {
- InputStream resetIn = IOUtils.forceResetableStream(in);
- if (resetIn != in) {
- in.close();
- in = resetIn;
- }
- }
- } catch (IOException e) {
- throw new IOException("Cannot save the url: "
- + (url == null ? "null" : url.toString()), e);
- }
- }
-
- return in;
- } catch (IOException e) {
- throw new IOException("Cannot open the url: "
- + (url == null ? "null" : url.toString()), e);
- }
+ public InputStream open(URL url, URL originalUrl, BasicSupport support,
+ boolean stable) throws IOException {
+ return open(url, originalUrl, support, stable, null, null, null);
}
/**
- * Open the given {@link URL} without using the cache, but still update the
- * cookies.
+ * Open a resource (will load it from the cache if possible, or save it into
+ * the cache after downloading if not).
+ * <p>
+ * The cached resource will be assimilated to the given original {@link URL}
*
* @param url
- * the {@link URL} to open
+ * the resource to open
+ * @param originalUrl
+ * the original {@link URL} before any redirection occurs, which
+ * is also used for the cache ID if needed (so we can retrieve
+ * the content with this URL if needed)
+ * @param support
+ * the support to use to download the resource
+ * @param stable
+ * TRUE for more stable resources, FALSE when they often change
+ * @param postParams
+ * the POST parameters
+ * @param getParams
+ * the GET parameters (priority over POST)
+ * @param oauth
+ * OAuth authorization (aka, "bearer XXXXXXX")
*
- * @return the {@link InputStream} of the opened page
+ * @return the opened resource, NOT NULL
*
* @throws IOException
* in case of I/O error
*/
- public InputStream openNoCache(URL url) throws IOException {
- return downloader.open(url);
+ public InputStream open(URL url, URL originalUrl, BasicSupport support,
+ boolean stable, Map<String, String> postParams,
+ Map<String, String> getParams, String oauth) throws IOException {
+
+ Map<String, String> cookiesValues = null;
+ URL currentReferer = url;
+
+ if (support != null) {
+ cookiesValues = support.getCookies();
+ currentReferer = support.getCurrentReferer();
+ // priority: arguments
+ if (oauth == null) {
+ oauth = support.getOAuth();
+ }
+ }
+
+ return downloader.open(url, originalUrl, currentReferer, cookiesValues,
+ postParams, getParams, oauth, stable);
}
/**
}
}
- return downloader.open(url, currentReferer, cookiesValues, postParams,
- getParams, oauth);
+ return downloaderNoCache.open(url, currentReferer, cookiesValues,
+ postParams, getParams, oauth);
}
/**
*/
public void refresh(URL url, BasicSupport support, boolean stable)
throws IOException {
- if (downloadCache != null && !downloadCache.check(url, false, stable)) {
- open(url, support, stable).close();
+ if (check(url, stable)) {
+ open(url, url, support, stable, null, null, null).close();
}
}
*
*/
public boolean check(URL url, boolean stable) {
- return downloadCache != null && downloadCache.check(url, false, stable);
+ return downloader.getCache() != null
+ && downloader.getCache().check(url, false, stable);
}
/**
for (Element result : doc.getElementsByClass("rechercheAffichage")) {
Element a = result.getElementsByTag("a").first();
if (a != null) {
+ int projectId = -1;
+
MetaData meta = new MetaData();
- meta.setUrl(a.absUrl("href"));
- Element img = result.getElementsByTag("img").first();
- if (img != null) {
- String coverUrl = img.absUrl("src");
- try {
- InputStream in = Instance.getCache().open(
- new URL(coverUrl), getSupport(), true);
- try {
- meta.setCover(new Image(in));
- } finally {
- in.close();
- }
- } catch (Exception e) {
- Instance.getTraceHandler()
- .error(new Exception(
- "Cannot download cover for MangaLEL story in search mode",
- e));
- }
- }
+ // Target:
+ // http://mangas-lecture-en-ligne.fr/index_lel.php?page=presentationProjet&idProjet=218
+
+ // a.absUrl("href"):
+ // http://mangas-lecture-en-ligne.fr/index_lel?onCommence=oui&idChapitre=2805
+
+ // ...but we need the PROJECT id, not the CHAPTER id -> use
+ // <IMG>
Elements infos = result.getElementsByClass("texte");
if (infos != null) {
getVal(tab, 5)));
}
- metas.add(meta);
+ Element img = result.getElementsByTag("img").first();
+ if (img != null) {
+ try {
+ String[] tab = img.attr("src").split("/");
+ String str = tab[tab.length - 1];
+ tab = str.split("\\.");
+ str = tab[0];
+ projectId = Integer.parseInt(str);
+
+ String coverUrl = img.absUrl("src");
+ try {
+ InputStream in = Instance.getCache().open(
+ new URL(coverUrl), getSupport(), true);
+ try {
+ meta.setCover(new Image(in));
+ } finally {
+ in.close();
+ }
+ } catch (Exception e) {
+ // Happen often on MangaLEL...
+ Instance.getTraceHandler().trace(
+ "Cannot download cover for MangaLEL story in search mode: "
+ + meta.getTitle());
+ }
+ } catch (Exception e) {
+ // no project id... cannot use the story :(
+ Instance.getTraceHandler().error(
+ "Cannot find ProjectId for MangaLEL story in search mode: "
+ + meta.getTitle());
+ }
+ }
+
+ if (projectId >= 0) {
+ meta.setUrl("http://mangas-lecture-en-ligne.fr/index_lel.php?page=presentationProjet&idProjet="
+ + projectId);
+ metas.add(meta);
+ }
}
}
private String getAuthor() {
Element doc = getSourceNode();
- Elements tabEls = doc.getElementsByClass("projet-titre");
-
- String value = "";
- if (tabEls.size() >= 2) {
- value = StringUtils.unhtml(tabEls.get(1).text()).trim();
+ Element tabEls = doc.getElementsByClass("presentation-projet").first();
+ if (tabEls != null) {
+ String[] tab = tabEls.outerHtml().split("<br>");
+ return getVal(tab, 1);
}
- return value;
+ return "";
}
private List<String> getTags() {
- List<String> tags = new ArrayList<String>();
-
Element doc = getSourceNode();
- Elements tabEls = doc.getElementsByClass("projet-titre");
-
- if (tabEls.size() >= 4) {
- String values = StringUtils.unhtml(tabEls.get(3).text()).trim();
- for (String value : values.split(",")) {
- tags.add(value);
+ Element tabEls = doc.getElementsByClass("presentation-projet").first();
+ if (tabEls != null) {
+ String[] tab = tabEls.outerHtml().split("<br>");
+ List<String> tags = new ArrayList<String>();
+ for (String tag : getVal(tab, 3).split(" ")) {
+ tags.add(tag);
}
+ return tags;
}
- return tags;
+ return new ArrayList<String>();
+
}
private String getDate() {
@Override
protected String getDesc() {
Element doc = getSourceNode();
- Elements tabEls = doc.getElementsByClass("projet-titre");
-
- String value = "";
- if (tabEls.size() >= 5) {
- value = StringUtils.unhtml(tabEls.get(4).text()).trim();
+ Element tabEls = doc.getElementsByClass("presentation-projet").first();
+ if (tabEls != null) {
+ String[] tab = tabEls.outerHtml().split("<br>");
+ return getVal(tab, 4);
}
- return value;
+ return "";
}
private Image getCover() {
return null;
}
+ private String getVal(String[] tab, int i) {
+ String val = "";
+
+ if (i < tab.length) {
+ val = StringUtils.unhtml(tab[i]);
+ int pos = val.indexOf(":");
+ if (pos >= 0) {
+ val = val.substring(pos + 1).trim();
+ }
+ }
+
+ return val;
+ }
+
@Override
protected List<Entry<String, URL>> getChapters(Progress pg)
throws IOException {
Element content = pageDoc.getElementById("content");
Elements linkEls = content.getElementsByTag("img");
for (Element linkEl : linkEls) {
- if (linkEl.attr("src").trim().isEmpty()) {
+ if (linkEl.absUrl("src").isEmpty()) {
continue;
}
builder.append("[");
- builder.append(linkEl.absUrl("src").trim());
+ builder.append(linkEl.absUrl("src"));
builder.append("]<br/>");
}