package be.nikiroo.fanfix.supported;
-import java.awt.image.BufferedImage;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
+import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map.Entry;
-import java.util.Scanner;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.jsoup.helper.DataUtil;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
import be.nikiroo.fanfix.Instance;
import be.nikiroo.fanfix.data.MetaData;
-import be.nikiroo.utils.IOUtils;
+import be.nikiroo.utils.Image;
+import be.nikiroo.utils.Progress;
import be.nikiroo.utils.StringUtils;
class MangaFox extends BasicSupport {
}
@Override
- public String getSourceName() {
- return "MangaFox.me";
- }
-
- @Override
- protected MetaData getMeta(URL source, InputStream in) throws IOException {
+ protected MetaData getMeta() throws IOException {
MetaData meta = new MetaData();
+ Element doc = getSourceNode();
+
+ Element title = doc.getElementById("title");
+ Elements table = null;
+ if (title != null) {
+ table = title.getElementsByTag("table");
+ }
+ if (table != null) {
+ // Rows: header, data
+ Elements rows = table.first().getElementsByTag("tr");
+ if (rows.size() > 1) {
+ table = rows.get(1).getElementsByTag("td");
+ // Columns: Realeased, Authors, Artists, Genres
+ if (table.size() < 4) {
+ table = null;
+ }
+ }
+ }
- meta.setTitle(getTitle(reset(in)));
- meta.setAuthor(getAuthor(reset(in)));
- meta.setDate(getDate(reset(in)));
- meta.setTags(getTags(reset(in)));
- meta.setSource(getSourceName());
- meta.setUrl(source.toString());
- meta.setPublisher(getSourceName());
- meta.setUuid(source.toString());
+ meta.setTitle(getTitle());
+ if (table != null) {
+ meta.setAuthor(getAuthors(table.get(1).text() + ","
+ + table.get(2).text()));
+
+ meta.setDate(StringUtils.unhtml(table.get(0).text()).trim());
+ meta.setTags(explode(table.get(3).text()));
+ }
+ meta.setSource(getType().getSourceName());
+ meta.setUrl(getSource().toString());
+ meta.setPublisher(getType().getSourceName());
+ meta.setUuid(getSource().toString());
meta.setLuid("");
- meta.setLang("EN");
+ meta.setLang("en");
meta.setSubject("manga");
meta.setType(getType().toString());
meta.setImageDocument(true);
- meta.setCover(getCover(reset(in)));
+ meta.setCover(getCover());
return meta;
}
- private List<String> getTags(InputStream in) {
- List<String> tags = new ArrayList<String>();
-
- String line = getLine(in, "/genres/", 0);
- if (line != null) {
- line = StringUtils.unhtml(line);
- String[] tab = line.split(",");
- if (tab != null) {
- for (String tag : tab) {
- tags.add(tag.trim());
- }
- }
- }
-
- return tags;
- }
-
- private String getTitle(InputStream in) {
- String line = getLine(in, " property=\"og:title\"", 0);
- if (line != null) {
- int pos = -1;
- for (int i = 0; i < 3; i++) {
- pos = line.indexOf('"', pos + 1);
- }
+ private String getTitle() {
+ Element doc = getSourceNode();
- if (pos >= 0) {
- line = line.substring(pos + 1);
- pos = line.indexOf('"');
- if (pos >= 0) {
- return line.substring(0, pos);
- }
- }
+ Element title = doc.getElementById("title");
+ Element h1 = title.getElementsByTag("h1").first();
+ if (h1 != null) {
+ return StringUtils.unhtml(h1.text()).trim();
}
return null;
}
- private String getAuthor(InputStream in) {
- List<String> authors = new ArrayList<String>();
-
- String line = getLine(in, "/author/", 0, false);
- if (line != null) {
- for (String ln : StringUtils.unhtml(line).split(",")) {
- if (ln != null && !ln.trim().isEmpty()
- && !authors.contains(ln.trim())) {
- authors.add(ln.trim());
- }
- }
- }
-
- try {
- in.reset();
- } catch (IOException e) {
- Instance.syserr(e);
- }
-
- line = getLine(in, "/artist/", 0, false);
- if (line != null) {
- for (String ln : StringUtils.unhtml(line).split(",")) {
- if (ln != null && !ln.trim().isEmpty()
- && !authors.contains(ln.trim())) {
- authors.add(ln.trim());
- }
- }
- }
-
- if (authors.isEmpty()) {
- return null;
- } else {
- StringBuilder builder = new StringBuilder();
- for (String author : authors) {
- if (builder.length() > 0) {
- builder.append(", ");
- }
-
- builder.append(author);
+ private String getAuthors(String authorList) {
+ String author = "";
+ for (String auth : explode(authorList)) {
+ if (!author.isEmpty()) {
+ author = author + ", ";
}
-
- return builder.toString();
- }
- }
-
- private String getDate(InputStream in) {
- String line = getLine(in, "/released/", 0);
- if (line != null) {
- line = StringUtils.unhtml(line);
- return line.trim();
+ author += auth;
}
- return null;
+ return author;
}
@Override
- protected String getDesc(URL source, InputStream in) {
- String line = getLine(in, " property=\"og:description\"", 0);
- if (line != null) {
- int pos = -1;
- for (int i = 0; i < 3; i++) {
- pos = line.indexOf('"', pos + 1);
- }
-
- if (pos >= 0) {
- line = line.substring(pos + 1);
- pos = line.indexOf('"');
- if (pos >= 0) {
- return line.substring(0, pos);
- }
- }
+ protected String getDesc() {
+ Element doc = getSourceNode();
+ Element title = doc.getElementsByClass("summary").first();
+ if (title != null) {
+ return StringUtils.unhtml(title.text()).trim();
}
return null;
}
- private BufferedImage getCover(InputStream in) {
- String line = getLine(in, " property=\"og:image\"", 0);
- String cover = null;
- if (line != null) {
- int pos = -1;
- for (int i = 0; i < 3; i++) {
- pos = line.indexOf('"', pos + 1);
- }
-
- if (pos >= 0) {
- line = line.substring(pos + 1);
- pos = line.indexOf('"');
- if (pos >= 0) {
- cover = line.substring(0, pos);
- }
- }
+ private Image getCover() {
+ Element doc = getSourceNode();
+ Element cover = doc.getElementsByClass("cover").first();
+ if (cover != null) {
+ cover = cover.getElementsByTag("img").first();
}
if (cover != null) {
+ String coverUrl = cover.absUrl("src");
+
InputStream coverIn;
try {
- coverIn = openEx(cover);
+ coverIn = openEx(coverUrl);
try {
- return IOUtils.toImage(coverIn);
+ return new Image(coverIn);
} finally {
coverIn.close();
}
} catch (IOException e) {
+ Instance.getTraceHandler().error(e);
}
}
}
@Override
- protected List<Entry<String, URL>> getChapters(URL source, InputStream in) {
+ protected List<Entry<String, URL>> getChapters(Progress pg) {
List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
- String volumeAt = "<h3 class=\"volume\">";
- String linkAt = "href=\"http://mangafox.me/";
- String endAt = "<script type=\"text/javascript\">";
+ String prefix = null; // each chapter starts with this prefix, then a
+ // chapter number (including "x.5"), then name
- boolean started = false;
+ Element doc = getSourceNode();
+ for (Element li : doc.getElementsByTag("li")) {
+ Element el = li.getElementsByTag("h4").first();
+ if (el == null) {
+ el = li.getElementsByTag("h3").first();
+ }
+ if (el != null) {
+ Element a = el.getElementsByTag("a").first();
+ if (a != null) {
+ String title = StringUtils.unhtml(el.text()).trim();
+ try {
+ String url = a.absUrl("href");
+ if (url.endsWith("1.html")) {
+ url = url.substring(0,
+ url.length() - "1.html".length());
+ }
+ if (!url.endsWith("/")) {
+ url += "/";
+ }
- @SuppressWarnings("resource")
- Scanner scan = new Scanner(in, "UTF-8");
- scan.useDelimiter("\\n");
- while (scan.hasNext()) {
- String line = scan.next();
+ if (prefix == null || !prefix.isEmpty()) {
+ StringBuilder possiblePrefix = new StringBuilder(
+ StringUtils.unhtml(a.text()).trim());
+ while (possiblePrefix.length() > 0) {
+ char car = possiblePrefix.charAt(possiblePrefix
+ .length() - 1);
+ boolean punctuation = (car == '.' || car == ' ');
+ boolean digit = (car >= '0' && car <= '9');
+ if (!punctuation && !digit) {
+ break;
+ }
+
+ possiblePrefix.setLength(possiblePrefix
+ .length() - 1);
+ }
+
+ if (prefix == null) {
+ prefix = possiblePrefix.toString();
+ }
+
+ if (!prefix.equalsIgnoreCase(possiblePrefix
+ .toString())) {
+ prefix = ""; // prefix not ok
+ }
+ }
- if (started && line.contains(endAt)) {
- break;
- } else if (!started && line.contains(volumeAt)) {
- started = true;
+ urls.add(new AbstractMap.SimpleEntry<String, URL>(
+ title, new URL(url)));
+ } catch (Exception e) {
+ Instance.getTraceHandler().error(e);
+ }
+ }
}
+ }
- if (started && line.contains(linkAt)) {
- // Chapter content url
- String url = null;
- int pos = line.indexOf("href=\"");
- if (pos >= 0) {
- line = line.substring(pos + "href=\"".length());
- pos = line.indexOf('\"');
+ if (prefix != null && !prefix.isEmpty()) {
+ try {
+ // We found a prefix, so everything should be sortable
+ SortedMap<Double, Entry<String, URL>> map = new TreeMap<Double, Entry<String, URL>>();
+ for (Entry<String, URL> entry : urls) {
+ String num = entry.getKey().substring(prefix.length() + 1)
+ .trim();
+ String name = "";
+ int pos = num.indexOf(' ');
if (pos >= 0) {
- url = line.substring(0, pos);
+ name = num.substring(pos).trim();
+ num = num.substring(0, pos).trim();
}
- }
- // Chapter name
- String name = null;
- if (scan.hasNext()) {
- name = StringUtils.unhtml(scan.next()).trim();
- // Remove the "new" tag if present
- if (name.endsWith("new")) {
- name = name.substring(0, name.length() - 3).trim();
+ if (!name.isEmpty()) {
+ name = "Tome " + num + ": " + name;
+ } else {
+ name = "Tome " + num;
}
- }
- // to help with the retry and the originalUrl
- refresh(url);
+ double key = Double.parseDouble(num);
- try {
- final String key = name;
- final URL value = new URL(url);
- urls.add(new Entry<String, URL>() {
- public URL setValue(URL value) {
- return null;
- }
-
- public String getKey() {
- return key;
- }
-
- public URL getValue() {
- return value;
- }
- });
- } catch (MalformedURLException e) {
- Instance.syserr(e);
+ map.put(key, new AbstractMap.SimpleEntry<String, URL>(name,
+ entry.getValue()));
}
+ urls = new ArrayList<Entry<String, URL>>(map.values());
+ } catch (NumberFormatException e) {
+ Instance.getTraceHandler()
+ .error(new IOException(
+ "Cannot find a tome number, revert to default sorting",
+ e));
+ // by default, the chapters are in reversed order
+ Collections.reverse(urls);
}
+ } else {
+ // by default, the chapters are in reversed order
+ Collections.reverse(urls);
}
- // the chapters are in reversed order
- Collections.reverse(urls);
-
return urls;
}
@Override
- protected String getChapterContent(URL source, InputStream in, int number) {
- StringBuilder builder = new StringBuilder();
- String base = getCurrentReferer().toString();
- int pos = base.lastIndexOf('/');
- base = base.substring(0, pos + 1); // including the '/' at the end
+ protected String getChapterContent(URL chapUrl, int number, Progress pg)
+ throws IOException {
+ if (pg == null) {
+ pg = new Progress();
+ }
- boolean close = false;
- while (in != null) {
- String linkNextLine = getLine(in, "return enlarge()", 0);
- try {
- in.reset();
- } catch (IOException e) {
- Instance.syserr(e);
- }
+ StringBuilder builder = new StringBuilder();
- String linkImageLine = getLine(in, "return enlarge()", 1);
- String linkNext = null;
- String linkImage = null;
- pos = linkNextLine.indexOf("href=\"");
- if (pos >= 0) {
- linkNextLine = linkNextLine.substring(pos + "href=\"".length());
- pos = linkNextLine.indexOf('\"');
- if (pos >= 0) {
- linkNext = linkNextLine.substring(0, pos);
- }
- }
- pos = linkImageLine.indexOf("src=\"");
- if (pos >= 0) {
- linkImageLine = linkImageLine
- .substring(pos + "src=\"".length());
- pos = linkImageLine.indexOf('\"');
- if (pos >= 0) {
- linkImage = linkImageLine.substring(0, pos);
- }
- }
+ String url = chapUrl.toString();
+ InputStream imageIn = null;
+ Element imageDoc = null;
- if (linkImage != null) {
- builder.append("[");
- // to help with the retry and the originalUrl, part 1
- builder.append(withoutQuery(linkImage));
- builder.append("]<br/>");
+ // 1. find out how many images there are
+ int size;
+ try {
+ // note: when used, the base URL can be an ad-page
+ imageIn = openEx(url + "1.html");
+ imageDoc = DataUtil.load(imageIn, "UTF-8", url + "1.html");
+ } catch (IOException e) {
+ Instance.getTraceHandler().error(
+ new IOException("Cannot get image " + 1 + " of manga", e));
+ } finally {
+ if (imageIn != null) {
+ imageIn.close();
}
+ }
+ Element select = imageDoc.getElementsByClass("m").first();
+ Elements options = select.getElementsByTag("option");
+ size = options.size() - 1; // last is "Comments"
- // to help with the retry and the originalUrl, part 2
- refresh(linkImage);
+ pg.setMinMax(0, size);
- if (close) {
+ // 2. list them
+ for (int i = 1; i <= size; i++) {
+ if (i > 1) { // because first one was opened for size
try {
- in.close();
- } catch (IOException e) {
- Instance.syserr(e);
- }
- }
+ imageIn = openEx(url + i + ".html");
+ imageDoc = DataUtil.load(imageIn, "UTF-8", url + i
+ + ".html");
+
+ String linkImage = imageDoc.getElementById("image").absUrl(
+ "src");
+ if (linkImage != null) {
+ builder.append("[");
+ // to help with the retry and the originalUrl, part 1
+ builder.append(withoutQuery(linkImage));
+ builder.append("]<br/>");
+ }
- in = null;
- if (linkNext != null && !"javascript:void(0);".equals(linkNext)) {
- URL url;
- try {
- url = new URL(base + linkNext);
- in = openEx(base + linkNext);
- setCurrentReferer(url);
+ // to help with the retry and the originalUrl, part 2
+ refresh(linkImage);
} catch (IOException e) {
- Instance.syserr(new IOException(
- "Cannot get the next manga page which is: "
- + linkNext, e));
+ Instance.getTraceHandler().error(
+ new IOException("Cannot get image " + i
+ + " of manga", e));
+ } finally {
+ if (imageIn != null) {
+ imageIn.close();
+ }
}
}
-
- close = true;
}
- setCurrentReferer(source);
return builder.toString();
}
- @Override
- protected boolean supports(URL url) {
- return "mangafox.me".equals(url.getHost())
- || "www.mangafox.me".equals(url.getHost());
- }
-
/**
* Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}.
*
*/
private InputStream openEx(String url) throws IOException {
try {
- return Instance.getCache().open(new URL(url), this, true,
- withoutQuery(url));
+ return Instance.getCache().open(new URL(url), withoutQuery(url),
+ this, true);
} catch (Exception e) {
// second chance
try {
} catch (InterruptedException ee) {
}
- return Instance.getCache().open(new URL(url), this, true,
- withoutQuery(url));
+ return Instance.getCache().open(new URL(url), withoutQuery(url),
+ this, true);
}
}
return null;
}
}
+
+ /**
+ * Explode an HTML comma-separated list of values into a non-duplicate text
+ * {@link List} .
+ *
+ * @param values
+ * the comma-separated values in HTML format
+ *
+ * @return the full list with no duplicate in text format
+ */
+ private List<String> explode(String values) {
+ List<String> list = new ArrayList<String>();
+ if (values != null && !values.isEmpty()) {
+ for (String auth : values.split(",")) {
+ String a = StringUtils.unhtml(auth).trim();
+ if (!a.isEmpty() && !list.contains(a.trim())) {
+ list.add(a);
+ }
+ }
+ }
+
+ return list;
+ }
+
+ @Override
+ protected boolean supports(URL url) {
+ return "mangafox.me".equals(url.getHost())
+ || "www.mangafox.me".equals(url.getHost())
+ || "fanfox.net".equals(url.getHost())
+ || "www.fanfox.net".equals(url.getHost());
+ }
}