--- /dev/null
+package be.nikiroo.fanfix.searchable;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.List;
+
+import org.jsoup.helper.DataUtil;
+import org.jsoup.nodes.Document;
+
+import be.nikiroo.fanfix.Instance;
+import be.nikiroo.fanfix.data.MetaData;
+import be.nikiroo.fanfix.supported.BasicSupport;
+import be.nikiroo.fanfix.supported.SupportType;
+
+/**
+ * This class supports browsing through stories on the supported websites. It
+ * will fetch some {@link MetaData} that satisfy a search query or some tags if
+ * supported.
+ *
+ * @author niki
+ */
+public abstract class BasicSearchable {
+ private SupportType type;
+ private BasicSupport support;
+
+ /**
+ * Create a new {@link BasicSearchable} of the given type.
+ *
+ * @param type
+ * the type, must not be NULL
+ */
+ public BasicSearchable(SupportType type) {
+ setType(type);
+ support = BasicSupport.getSupport(getType(), null);
+ }
+
+ /**
+ * The support type.
+ *
+ * @return the type
+ */
+ public SupportType getType() {
+ return type;
+ }
+
+ /**
+ * The support type.
+ *
+ * @param type
+ * the new type
+ */
+ protected void setType(SupportType type) {
+ this.type = type;
+ }
+
+ /**
+ * The associated {@link BasicSupport}.
+ * <p>
+ * Mostly used to download content.
+ *
+ * @return the support
+ */
+ protected BasicSupport getSupport() {
+ return support;
+ }
+
+ /**
+ * Get a list of tags that can be browsed here.
+ *
+ * @return the list of tags
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ abstract public List<SearchableTag> getTags() throws IOException;
+
+ /**
+ * Fill the tag (set it 'complete') with more information from the support.
+ *
+ * @param tag
+ * the tag to fill
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ abstract protected void fillTag(SearchableTag tag) throws IOException;
+
+ /**
+ * Search for the given term and return a list of stories satisfying this
+ * search term.
+ * <p>
+ * Not that the returned stories will <b>NOT</b> be complete, but will only
+ * contain enough information to present them to the user and retrieve them.
+ * <p>
+ * URL is guaranteed to be usable, LUID will always be NULL.
+ *
+ * @param search
+ * the term to search for
+ *
+ * @return a list of stories that satisfy that search term
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ abstract public List<MetaData> search(String search) throws IOException;
+
+ /**
+ * Search for the given tag and return a list of stories satisfying this
+ * tag.
+ * <p>
+ * Not that the returned stories will <b>NOT</b> be complete, but will only
+ * contain enough information to present them to the user and retrieve them.
+ * <p>
+ * URL is guaranteed to be usable, LUID will always be NULL.
+ *
+ * @param tagId
+ * the tag to search for
+ * @param page
+ * the page to use for result pagination (see
+ * {@link SearchableTag#getPages()}, remember to check for -1),
+ * index is 1-based
+ *
+ * @return a list of stories that satisfy that search term
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ abstract public List<MetaData> search(SearchableTag tag, int page)
+ throws IOException;
+
+ /**
+ * Load a document from its url.
+ *
+ * @param url
+ * the URL to load
+ * @param stable
+ * TRUE for more stable resources, FALSE when they often change
+ *
+ * @return the document
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ protected Document load(String url, boolean stable) throws IOException {
+ return load(new URL(url), stable);
+ }
+
+ /**
+ * Load a document from its url.
+ *
+ * @param url
+ * the URL to load
+ * @param stable
+ * TRUE for more stable resources, FALSE when they often change
+ *
+ * @return the document
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ protected Document load(URL url, boolean stable) throws IOException {
+ return DataUtil.load(Instance.getCache().open(url, support, stable),
+ "UTF-8", url.toString());
+ }
+
+ /**
+ * Return a {@link BasicSearchable} implementation supporting the given
+ * type, or NULL if it does not exist.
+ *
+ * @param type
+ * the type, must not be NULL
+ *
+ * @return an implementation that supports it, or NULL
+ */
+ public static BasicSearchable getSearchable(SupportType type) {
+ BasicSearchable support = null;
+
+ switch (type) {
+ case FIMFICTION:
+ // TODO
+ break;
+ case FANFICTION:
+ support = new Fanfiction(type);
+ break;
+ case MANGAFOX:
+ // TODO
+ break;
+ case E621:
+ // TODO
+ break;
+ case YIFFSTAR:
+ // TODO
+ break;
+ case E_HENTAI:
+ // TODO
+ break;
+ case MANGA_LEL:
+ // TODO
+ break;
+ case CBZ:
+ case HTML:
+ case INFO_TEXT:
+ case TEXT:
+ case EPUB:
+ break;
+ }
+
+ return support;
+ }
+}
--- /dev/null
+package be.nikiroo.fanfix.searchable;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import be.nikiroo.fanfix.Instance;
+import be.nikiroo.fanfix.bundles.StringId;
+import be.nikiroo.fanfix.data.MetaData;
+import be.nikiroo.fanfix.supported.SupportType;
+import be.nikiroo.utils.Image;
+import be.nikiroo.utils.StringUtils;
+
+/**
+ * A {@link BasicSearchable} for Fanfiction.NET.
+ *
+ * @author niki
+ */
+class Fanfiction extends BasicSearchable {
+ static private String BASE_URL = "http://fanfiction.net/";
+
+ /**
+ * Create a new {@link Fanfiction}.
+ *
+ * @param type
+ * {@link SupportType#FANFICTION}
+ */
+ public Fanfiction(SupportType type) {
+ super(type);
+ }
+
+ @Override
+ public List<SearchableTag> getTags() throws IOException {
+ String storiesName = null;
+ String crossoversName = null;
+ Map<String, String> stories = new HashMap<String, String>();
+ Map<String, String> crossovers = new HashMap<String, String>();
+
+ Document mainPage = load(BASE_URL, true);
+ Element menu = mainPage.getElementsByClass("dropdown").first();
+ if (menu != null) {
+ Element ul = menu.getElementsByClass("dropdown-menu").first();
+ if (ul != null) {
+ Map<String, String> currentList = null;
+ for (Element li : ul.getElementsByTag("li")) {
+ if (li.hasClass("disabled")) {
+ if (storiesName == null) {
+ storiesName = li.text();
+ currentList = stories;
+ } else {
+ crossoversName = li.text();
+ currentList = crossovers;
+ }
+ } else if (currentList != null) {
+ Element a = li.getElementsByTag("a").first();
+ if (a != null) {
+ currentList.put(a.absUrl("href"), a.text());
+ }
+ }
+ }
+ }
+ }
+
+ List<SearchableTag> tags = new ArrayList<SearchableTag>();
+
+ if (storiesName != null) {
+ SearchableTag tag = new SearchableTag(null, storiesName, false);
+ for (String id : stories.keySet()) {
+ tag.add(new SearchableTag(id, stories.get(id), true, false));
+ }
+ tags.add(tag);
+ }
+
+ if (crossoversName != null) {
+ SearchableTag tag = new SearchableTag(null, crossoversName, false);
+ for (String id : crossovers.keySet()) {
+ tag.add(new SearchableTag(id, crossovers.get(id), false, false));
+ }
+ tags.add(tag);
+ }
+
+ return tags;
+ }
+
+ @Override
+ protected void fillTag(SearchableTag tag) throws IOException {
+ if (tag.getId() == null || tag.isComplete()) {
+ return;
+ }
+
+ Document doc = load(tag.getId(), false);
+ Element list = doc.getElementById("list_output");
+ if (list != null) {
+ Element table = list.getElementsByTag("table").first();
+ if (table != null) {
+ for (Element div : table.getElementsByTag("div")) {
+ Element a = div.getElementsByTag("a").first();
+ Element span = div.getElementsByTag("span").first();
+
+ if (a != null) {
+ String subid = a.absUrl("href");
+ boolean crossoverSubtag = subid
+ .contains("/crossovers/");
+
+ SearchableTag subtag = new SearchableTag(subid,
+ a.text(), !crossoverSubtag, !crossoverSubtag);
+
+ tag.add(subtag);
+ if (span != null) {
+ String nr = span.text();
+ if (nr.startsWith("(")) {
+ nr = nr.substring(1);
+ }
+ if (nr.endsWith(")")) {
+ nr = nr.substring(0, nr.length() - 1);
+ }
+ nr = nr.trim();
+ subtag.setCount(toNumber(nr));
+ }
+ }
+ }
+ }
+ }
+
+ tag.setComplete(true);
+ }
+
+ /**
+ * @deprecated use {@link StringUtils} when updated
+ */
+ @Deprecated
+ private static long toNumber(String value) {
+ // TODO: use StringUtils instead after update
+ long count = 0l;
+ if (value != null) {
+ try {
+ if (value.toLowerCase().endsWith("m")) {
+ count = Long.parseLong(value.substring(0,
+ value.length() - 1).trim());
+ count *= 1000000;
+ } else if (value.toLowerCase().endsWith("k")) {
+ count = Long.parseLong(value.substring(0,
+ value.length() - 1).trim());
+ count *= 1000;
+ } else {
+ count = Long.parseLong(value);
+ }
+ } catch (NumberFormatException pe) {
+ }
+ }
+
+ return count;
+ }
+
+ @Override
+ public List<MetaData> search(String search) throws IOException {
+ String encoded = URLEncoder.encode(search.toLowerCase(), "utf-8");
+ return getStories(BASE_URL + "search/?ready=1&type=story&keywords="
+ + encoded, null, null);
+ }
+
+ @Override
+ public List<MetaData> search(SearchableTag tag, int page)
+ throws IOException {
+ List<MetaData> metas = new ArrayList<MetaData>();
+
+ String url = tag.getId();
+ if (url != null) {
+ if (page > 1) {
+ int pos = url.indexOf("&p=");
+ if (pos >= 0) {
+ url = url.replaceAll("(.*\\&p=)[0-9]*(.*)", "$1\\" + page
+ + "$2");
+ } else {
+ url += "&p=" + page;
+ }
+ }
+
+ Document doc = load(url, false);
+
+ // Update the pages number if needed
+ if (tag.getPages() < 0) {
+ tag.setPages(getPages(doc));
+ }
+
+ // Find out the full subjects (including parents)
+ String subjects = "";
+ for (SearchableTag t = tag; t != null; t = t.getParent()) {
+ if (!subjects.isEmpty()) {
+ subjects += ", ";
+ }
+ subjects += t.getName();
+ }
+
+ metas = getStories(url, doc, subjects);
+ }
+
+ return metas;
+ }
+
+ /**
+ * Return the number of pages in this stories result listing.
+ *
+ * @param doc
+ * the document
+ *
+ * @return the number of pages or -1 if unknown
+ *
+ * @throws IOException
+ * in case of I/O errors
+ */
+ private int getPages(Document doc) throws IOException {
+ int pages = -1;
+
+ if (doc != null) {
+ Element center = doc.getElementsByTag("center").first();
+ if (center != null) {
+ for (Element a : center.getElementsByTag("a")) {
+ if (a.absUrl("href").contains("&p=")) {
+ int thisLinkPages = -1;
+ try {
+ String[] tab = a.absUrl("href").split("=");
+ tab = tab[tab.length - 1].split("&");
+ thisLinkPages = Integer
+ .parseInt(tab[tab.length - 1]);
+ } catch (Exception e) {
+ }
+
+ pages = Math.max(pages, thisLinkPages);
+ }
+ }
+ }
+ }
+
+ return pages;
+ }
+
+ /**
+ * Fetch the stories from the given page.
+ *
+ * @param sourceUrl
+ * the url of the document
+ * @param doc
+ * the document to use (if NULL, will be loaded from
+ * <tt>sourceUrl</tt>)
+ * @param mainSubject
+ * the main subject (the anime/book/movie item related to the
+ * stories, like "MLP" or "Doctor Who"), or NULL if none
+ *
+ * @return the stories found in it
+ *
+ * @throws IOException
+ * in case of I/O errors
+ */
+ private List<MetaData> getStories(String sourceUrl, Document doc,
+ String mainSubject) throws IOException {
+ List<MetaData> metas = new ArrayList<MetaData>();
+
+ if (doc == null) {
+ doc = load(sourceUrl, false);
+ }
+
+ for (Element story : doc.getElementsByClass("z-list")) {
+ MetaData meta = new MetaData();
+ meta.setImageDocument(false);
+ meta.setSource(getType().getSourceName());
+
+ // Title, URL, Cover
+ Element stitle = story.getElementsByClass("stitle").first();
+ if (stitle != null) {
+ meta.setTitle(stitle.text());
+ meta.setUrl(stitle.absUrl("href"));
+ Element cover = stitle.getElementsByTag("img").first();
+ if (cover != null) {
+ // note: see data-original if needed?
+ String coverUrl = cover.absUrl("src");
+
+ try {
+ InputStream in = Instance.getCache().open(
+ new URL(coverUrl), getSupport(), true);
+ try {
+ meta.setCover(new Image(in));
+ } finally {
+ in.close();
+ }
+ } catch (Exception e) {
+ Instance.getTraceHandler()
+ .error(new Exception(
+ "Cannot download cover for Fanfiction story in search mode",
+ e));
+ }
+ }
+ }
+
+ // Author
+ Elements as = story.getElementsByTag("a");
+ if (as.size() > 1) {
+ meta.setAuthor(as.get(1).text());
+ }
+
+ // Tags (concatenated text), published date, updated date, Resume
+ String tags = "";
+ List<String> tagList = new ArrayList<String>();
+ Elements divs = story.getElementsByTag("div");
+ if (divs.size() > 1 && divs.get(1).childNodeSize() > 0) {
+ String resume = divs.get(1).text();
+ if (divs.size() > 2) {
+ tags = divs.get(2).text();
+ resume = resume.substring(0,
+ resume.length() - tags.length()).trim();
+
+ for (Element d : divs.get(2).getElementsByAttribute(
+ "data-xutime")) {
+ String secs = d.attr("data-xutime");
+ try {
+ String date = new SimpleDateFormat("yyyy-MM-dd")
+ .format(new Date(
+ Long.parseLong(secs) * 1000));
+ // (updated, ) published
+ if (meta.getDate() != null) {
+ tagList.add("Updated: " + meta.getDate());
+ }
+ meta.setDate(date);
+ } catch (Exception e) {
+ }
+ }
+ }
+
+ meta.setResume(getSupport().makeChapter(new URL(sourceUrl), 0,
+ Instance.getTrans().getString(StringId.DESCRIPTION),
+ resume));
+ }
+
+ // How are the tags ordered?
+ // We have "Rated: xx", then the language, then all other tags
+ // If the subject(s) is/are present, they are before "Rated: xx"
+
+ // ////////////
+ // Examples: //
+ // ////////////
+
+ // Search (Luna) Tags: [Harry Potter, Rated: T, English, Chapters:
+ // 1, Words: 270, Reviews: 2, Published: 2/19/2013, Luna L.]
+
+ // Normal (MLP) Tags: [Rated: T, Spanish, Drama/Suspense, Chapters:
+ // 2, Words: 8,686, Reviews: 1, Favs: 1, Follows: 1, Updated: 4/7,
+ // Published: 4/2]
+
+ // Crossover (MLP/Who) Tags: [Rated: K+, English, Adventure/Romance,
+ // Chapters: 8, Words: 7,788, Reviews: 2, Favs: 2, Follows: 1,
+ // Published: 9/1/2016]
+
+ boolean rated = false;
+ boolean isLang = false;
+ String subject = mainSubject == null ? "" : mainSubject;
+ String[] tab = tags.split(" *- *");
+ for (int i = 0; i < tab.length; i++) {
+ String tag = tab[i];
+ if (tag.startsWith("Rated: ")) {
+ rated = true;
+ }
+
+ if (!rated) {
+ if (!subject.isEmpty()) {
+ subject += ", ";
+ }
+ subject += tag;
+ } else if (isLang) {
+ meta.setLang(tag);
+ isLang = false;
+ } else {
+ if (tag.contains(":")) {
+ // Handle special tags:
+ if (tag.startsWith("Words: ")) {
+ try {
+ meta.setWords(Long.parseLong(tag
+ .substring("Words: ".length())
+ .replace(",", "").trim()));
+ } catch (Exception e) {
+ }
+ } else if (tag.startsWith("Rated: ")) {
+ tagList.add(tag);
+ }
+ } else {
+ // Normal tags are "/"-separated
+ for (String t : tag.split("/")) {
+ tagList.add(t);
+ }
+ }
+
+ if (tag.startsWith("Rated: ")) {
+ isLang = true;
+ }
+ }
+ }
+
+ meta.setSubject(subject);
+ meta.setTags(tagList);
+
+ metas.add(meta);
+ }
+
+ return metas;
+ }
+}
--- /dev/null
+package be.nikiroo.fanfix.searchable;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class represents a tag that can be searched on a supported website.
+ *
+ * @author niki
+ */
+public class SearchableTag {
+ private String id;
+ private String name;
+ private boolean complete;
+ private long count;
+
+ private SearchableTag parent;
+ private List<SearchableTag> children;
+
+ /**
+ * The number of stories result pages this tag can get.
+ * <p>
+ * We keep more information than what the getter/setter returns/accepts.
+ * <ul>
+ * <li>-2: this tag does not support stories results (not a leaf tag)</li>
+ * <li>-1: the number is not yet known, but will be known after a
+ * {@link BasicSearchable#fillTag(SearchableTag)} operation</li>
+ * <li>X: the number of pages</li>
+ * </ul>
+ */
+ private int pages;
+
+ /**
+ * Create a new {@link SearchableTag}.
+ * <p>
+ * Note that tags are complete by default.
+ *
+ * @param id
+ * the ID (usually a way to find the linked stories later on)
+ * @param name
+ * the tag name, which can be displayed to the user
+ * @param leaf
+ * the tag is a leaf tag, that is, it will not return subtags
+ * with {@link BasicSearchable#fillTag(SearchableTag)} but will
+ * return stories with
+ * {@link BasicSearchable#search(SearchableTag)}
+ */
+ public SearchableTag(String id, String name, boolean leaf) {
+ this(id, name, leaf, true);
+ }
+
+ /**
+ * Create a new {@link SearchableTag}.
+ *
+ * @param id
+ * the ID (usually a way to find the linked stories later on)
+ * @param name
+ * the tag name, which can be displayed to the user
+ * @param leaf
+ * the tag is a leaf tag, that is, it will not return subtags
+ * with {@link BasicSearchable#fillTag(SearchableTag)} but will
+ * return stories with
+ * {@link BasicSearchable#search(SearchableTag)}
+ * @param complete
+ * the tag {@link SearchableTag#isComplete()} or not
+ */
+ public SearchableTag(String id, String name, boolean leaf, boolean complete) {
+ this.id = id;
+ this.name = name;
+ this.complete = complete;
+
+ setLeaf(leaf);
+
+ children = new ArrayList<SearchableTag>();
+ }
+
+ /**
+ * The ID (usually a way to find the linked stories later on).
+ *
+ * @return the ID
+ */
+ public String getId() {
+ return id;
+ }
+
+ /**
+ * The tag name, which can be displayed to the user.
+ *
+ * @return then name
+ */
+ public String getName() {
+ return name;
+ }
+
+ /**
+ * Non-complete, non-leaf tags can still be completed via a
+ * {@link BasicSearchable#fillTag(SearchableTag)} operation from a
+ * {@link BasicSearchable}, in order to gain (more?) subtag children.
+ * <p>
+ * This method does not make sense for leaf tags.
+ *
+ * @return TRUE if it is complete
+ */
+ public boolean isComplete() {
+ return complete;
+ }
+
+ /**
+ * Non-complete, non-leaf tags can still be completed via a
+ * {@link BasicSearchable#fillTag(SearchableTag)} operation from a
+ * {@link BasicSearchable}, in order to gain (more?) subtag children.
+ * <p>
+ * This method does not make sense for leaf tags.
+ *
+ * @param complete
+ * TRUE if it is complete
+ */
+ public void setComplete(boolean complete) {
+ this.complete = complete;
+ }
+
+ /**
+ * The number of items that can be found with this tag if it is searched.
+ * <p>
+ * Will report the number of subtags by default.
+ *
+ * @return the number of items
+ */
+ public long getCount() {
+ long count = this.count;
+ if (count <= 0) {
+ count = children.size();
+ }
+
+ return count;
+ }
+
+ /**
+ * The number of items that can be found with this tag if it is searched.
+ *
+ * @param count
+ * the new count
+ */
+ public void setCount(long count) {
+ this.count = count;
+ }
+
+ /**
+ * The number of stories result pages this tag contains, only make sense if
+ * {@link SearchableTag#isLeaf()} returns TRUE.
+ * <p>
+ * Will return -1 if the number is not yet known.
+ *
+ * @return the number of pages, or -1
+ */
+ public int getPages() {
+ return Math.max(-1, pages);
+ }
+
+ /**
+ * The number of stories result pages this tag contains, only make sense if
+ * {@link SearchableTag#isLeaf()} returns TRUE.
+ *
+ * @param pages
+ * the (positive or 0) number of pages
+ */
+ public void setPages(int pages) {
+ this.pages = Math.max(-1, pages);
+ }
+
+ /**
+ * This tag is a leaf tag, that is, it will not return other subtags with
+ * {@link BasicSearchable#fillTag(SearchableTag)} but will return stories
+ * with {@link BasicSearchable#search(SearchableTag)}.
+ *
+ * @return TRUE if it is
+ */
+ public boolean isLeaf() {
+ return pages > -2;
+ }
+
+ /**
+ * This tag is a leaf tag, that is, it will not return other subtags with
+ * {@link BasicSearchable#fillTag(SearchableTag)} but will return stories
+ * with {@link BasicSearchable#search(SearchableTag)}.
+ * <p>
+ * Will reset the number of pages to -1.
+ *
+ * @param leaf
+ * TRUE if it is
+ */
+ public void setLeaf(boolean leaf) {
+ pages = leaf ? -1 : -2;
+ }
+
+ /**
+ * The subtag children of this {@link SearchableTag}.
+ * <p>
+ * Never NULL.
+ * <p>
+ * Note that if {@link SearchableTag#isComplete()} returns false, you can
+ * still fill (more?) subtag children with a {@link BasicSearchable}.
+ *
+ * @return the subtag children, never NULL
+ */
+ public List<SearchableTag> getChildren() {
+ return children;
+ }
+
+ /**
+ * Add the given {@link SearchableTag} as a subtag child.
+ *
+ * @param tag
+ * the tag to add
+ */
+ public void add(SearchableTag tag) {
+ children.add(tag);
+ tag.parent = this;
+ }
+
+ /**
+ * This {@link SearchableTag} parent tag, or NULL if none.
+ *
+ * @return the parent or NULL
+ */
+ public SearchableTag getParent() {
+ return parent;
+ }
+
+ /**
+ * Display a DEBUG {@link String} representation of this object.
+ */
+ @Override
+ public String toString() {
+ String rep = name + " [" + id + "]";
+ if (!complete) {
+ rep += "*";
+ }
+
+ if (getCount() > 0) {
+ rep += " (" + getCount() + ")";
+ }
+
+ if (!children.isEmpty()) {
+ String tags = "";
+ int i = 1;
+ for (SearchableTag tag : children) {
+ if (!tags.isEmpty()) {
+ tags += ", ";
+ }
+
+ if (i > 10) {
+ tags += "...";
+ break;
+ }
+
+ tags += tag;
+ i++;
+ }
+
+ rep += ": " + tags;
+ }
+
+ return rep;
+ }
+}