From: Niki Roo Date: Thu, 11 Apr 2019 05:16:16 +0000 (+0200) Subject: Merge branch 'search' X-Git-Url: https://git.nikiroo.be/?a=commitdiff_plain;h=3172c8cf7cdb9f9bf90c4515b8494ae84e9527ff;hp=4ff0b1a910b2c72ef133ba65a1bceb4be0932c87;p=fanfix.git Merge branch 'search' --- diff --git a/src/be/nikiroo/fanfix/reader/ui/GuiReaderBookInfo.java b/src/be/nikiroo/fanfix/reader/ui/GuiReaderBookInfo.java index 23d4c31..eb6dfbf 100644 --- a/src/be/nikiroo/fanfix/reader/ui/GuiReaderBookInfo.java +++ b/src/be/nikiroo/fanfix/reader/ui/GuiReaderBookInfo.java @@ -5,6 +5,7 @@ import be.nikiroo.fanfix.data.MetaData; import be.nikiroo.fanfix.data.Story; import be.nikiroo.fanfix.library.BasicLibrary; import be.nikiroo.utils.Image; +import be.nikiroo.utils.StringUtils; /** * Some meta information related to a "book" (which can either be a @@ -220,8 +221,12 @@ public class GuiReaderBookInfo { * the number to parse * * @return the displayable version of the number + * + * @deprecated use {@link StringUtils} after update instead */ + @Deprecated static private String formatNumber(long number) { + // TODO: replace with StringUtils after update String displayNumber; if (number >= 4000) { displayNumber = "" + (number / 1000) + "k"; diff --git a/src/be/nikiroo/fanfix/searchable/BasicSearchable.java b/src/be/nikiroo/fanfix/searchable/BasicSearchable.java new file mode 100644 index 0000000..c639443 --- /dev/null +++ b/src/be/nikiroo/fanfix/searchable/BasicSearchable.java @@ -0,0 +1,210 @@ +package be.nikiroo.fanfix.searchable; + +import java.io.IOException; +import java.net.URL; +import java.util.List; + +import org.jsoup.helper.DataUtil; +import org.jsoup.nodes.Document; + +import be.nikiroo.fanfix.Instance; +import be.nikiroo.fanfix.data.MetaData; +import be.nikiroo.fanfix.supported.BasicSupport; +import be.nikiroo.fanfix.supported.SupportType; + +/** + * This class supports browsing through stories on the supported websites. It + * will fetch some {@link MetaData} that satisfy a search query or some tags if + * supported. + * + * @author niki + */ +public abstract class BasicSearchable { + private SupportType type; + private BasicSupport support; + + /** + * Create a new {@link BasicSearchable} of the given type. + * + * @param type + * the type, must not be NULL + */ + public BasicSearchable(SupportType type) { + setType(type); + support = BasicSupport.getSupport(getType(), null); + } + + /** + * The support type. + * + * @return the type + */ + public SupportType getType() { + return type; + } + + /** + * The support type. + * + * @param type + * the new type + */ + protected void setType(SupportType type) { + this.type = type; + } + + /** + * The associated {@link BasicSupport}. + *

+ * Mostly used to download content. + * + * @return the support + */ + protected BasicSupport getSupport() { + return support; + } + + /** + * Get a list of tags that can be browsed here. + * + * @return the list of tags + * + * @throws IOException + * in case of I/O error + */ + abstract public List getTags() throws IOException; + + /** + * Fill the tag (set it 'complete') with more information from the support. + * + * @param tag + * the tag to fill + * + * @throws IOException + * in case of I/O error + */ + abstract protected void fillTag(SearchableTag tag) throws IOException; + + /** + * Search for the given term and return a list of stories satisfying this + * search term. + *

+ * Not that the returned stories will NOT be complete, but will only + * contain enough information to present them to the user and retrieve them. + *

+ * URL is guaranteed to be usable, LUID will always be NULL. + * + * @param search + * the term to search for + * + * @return a list of stories that satisfy that search term + * + * @throws IOException + * in case of I/O error + */ + abstract public List search(String search) throws IOException; + + /** + * Search for the given tag and return a list of stories satisfying this + * tag. + *

+ * Not that the returned stories will NOT be complete, but will only + * contain enough information to present them to the user and retrieve them. + *

+ * URL is guaranteed to be usable, LUID will always be NULL. + * + * @param tagId + * the tag to search for + * @param page + * the page to use for result pagination (see + * {@link SearchableTag#getPages()}, remember to check for -1), + * index is 1-based + * + * @return a list of stories that satisfy that search term + * + * @throws IOException + * in case of I/O error + */ + abstract public List search(SearchableTag tag, int page) + throws IOException; + + /** + * Load a document from its url. + * + * @param url + * the URL to load + * @param stable + * TRUE for more stable resources, FALSE when they often change + * + * @return the document + * + * @throws IOException + * in case of I/O error + */ + protected Document load(String url, boolean stable) throws IOException { + return load(new URL(url), stable); + } + + /** + * Load a document from its url. + * + * @param url + * the URL to load + * @param stable + * TRUE for more stable resources, FALSE when they often change + * + * @return the document + * + * @throws IOException + * in case of I/O error + */ + protected Document load(URL url, boolean stable) throws IOException { + return DataUtil.load(Instance.getCache().open(url, support, stable), + "UTF-8", url.toString()); + } + + /** + * Return a {@link BasicSearchable} implementation supporting the given + * type, or NULL if it does not exist. + * + * @param type + * the type, must not be NULL + * + * @return an implementation that supports it, or NULL + */ + public static BasicSearchable getSearchable(SupportType type) { + BasicSearchable support = null; + + switch (type) { + case FIMFICTION: + // TODO + break; + case FANFICTION: + support = new Fanfiction(type); + break; + case MANGAFOX: + // TODO + break; + case E621: + // TODO + break; + case YIFFSTAR: + // TODO + break; + case E_HENTAI: + // TODO + break; + case MANGA_LEL: + // TODO + break; + case CBZ: + case HTML: + case INFO_TEXT: + case TEXT: + case EPUB: + break; + } + + return support; + } +} diff --git a/src/be/nikiroo/fanfix/searchable/Fanfiction.java b/src/be/nikiroo/fanfix/searchable/Fanfiction.java new file mode 100644 index 0000000..bcc4759 --- /dev/null +++ b/src/be/nikiroo/fanfix/searchable/Fanfiction.java @@ -0,0 +1,416 @@ +package be.nikiroo.fanfix.searchable; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.net.URLEncoder; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import be.nikiroo.fanfix.Instance; +import be.nikiroo.fanfix.bundles.StringId; +import be.nikiroo.fanfix.data.MetaData; +import be.nikiroo.fanfix.supported.SupportType; +import be.nikiroo.utils.Image; +import be.nikiroo.utils.StringUtils; + +/** + * A {@link BasicSearchable} for Fanfiction.NET. + * + * @author niki + */ +class Fanfiction extends BasicSearchable { + static private String BASE_URL = "http://fanfiction.net/"; + + /** + * Create a new {@link Fanfiction}. + * + * @param type + * {@link SupportType#FANFICTION} + */ + public Fanfiction(SupportType type) { + super(type); + } + + @Override + public List getTags() throws IOException { + String storiesName = null; + String crossoversName = null; + Map stories = new HashMap(); + Map crossovers = new HashMap(); + + Document mainPage = load(BASE_URL, true); + Element menu = mainPage.getElementsByClass("dropdown").first(); + if (menu != null) { + Element ul = menu.getElementsByClass("dropdown-menu").first(); + if (ul != null) { + Map currentList = null; + for (Element li : ul.getElementsByTag("li")) { + if (li.hasClass("disabled")) { + if (storiesName == null) { + storiesName = li.text(); + currentList = stories; + } else { + crossoversName = li.text(); + currentList = crossovers; + } + } else if (currentList != null) { + Element a = li.getElementsByTag("a").first(); + if (a != null) { + currentList.put(a.absUrl("href"), a.text()); + } + } + } + } + } + + List tags = new ArrayList(); + + if (storiesName != null) { + SearchableTag tag = new SearchableTag(null, storiesName, false); + for (String id : stories.keySet()) { + tag.add(new SearchableTag(id, stories.get(id), true, false)); + } + tags.add(tag); + } + + if (crossoversName != null) { + SearchableTag tag = new SearchableTag(null, crossoversName, false); + for (String id : crossovers.keySet()) { + tag.add(new SearchableTag(id, crossovers.get(id), false, false)); + } + tags.add(tag); + } + + return tags; + } + + @Override + protected void fillTag(SearchableTag tag) throws IOException { + if (tag.getId() == null || tag.isComplete()) { + return; + } + + Document doc = load(tag.getId(), false); + Element list = doc.getElementById("list_output"); + if (list != null) { + Element table = list.getElementsByTag("table").first(); + if (table != null) { + for (Element div : table.getElementsByTag("div")) { + Element a = div.getElementsByTag("a").first(); + Element span = div.getElementsByTag("span").first(); + + if (a != null) { + String subid = a.absUrl("href"); + boolean crossoverSubtag = subid + .contains("/crossovers/"); + + SearchableTag subtag = new SearchableTag(subid, + a.text(), !crossoverSubtag, !crossoverSubtag); + + tag.add(subtag); + if (span != null) { + String nr = span.text(); + if (nr.startsWith("(")) { + nr = nr.substring(1); + } + if (nr.endsWith(")")) { + nr = nr.substring(0, nr.length() - 1); + } + nr = nr.trim(); + subtag.setCount(toNumber(nr)); + } + } + } + } + } + + tag.setComplete(true); + } + + /** + * @deprecated use {@link StringUtils} when updated + */ + @Deprecated + private static long toNumber(String value) { + // TODO: use StringUtils instead after update + long count = 0l; + if (value != null) { + try { + if (value.toLowerCase().endsWith("m")) { + count = Long.parseLong(value.substring(0, + value.length() - 1).trim()); + count *= 1000000; + } else if (value.toLowerCase().endsWith("k")) { + count = Long.parseLong(value.substring(0, + value.length() - 1).trim()); + count *= 1000; + } else { + count = Long.parseLong(value); + } + } catch (NumberFormatException pe) { + } + } + + return count; + } + + @Override + public List search(String search) throws IOException { + String encoded = URLEncoder.encode(search.toLowerCase(), "utf-8"); + return getStories(BASE_URL + "search/?ready=1&type=story&keywords=" + + encoded, null, null); + } + + @Override + public List search(SearchableTag tag, int page) + throws IOException { + List metas = new ArrayList(); + + String url = tag.getId(); + if (url != null) { + if (page > 1) { + int pos = url.indexOf("&p="); + if (pos >= 0) { + url = url.replaceAll("(.*\\&p=)[0-9]*(.*)", "$1\\" + page + + "$2"); + } else { + url += "&p=" + page; + } + } + + Document doc = load(url, false); + + // Update the pages number if needed + if (tag.getPages() < 0) { + tag.setPages(getPages(doc)); + } + + // Find out the full subjects (including parents) + String subjects = ""; + for (SearchableTag t = tag; t != null; t = t.getParent()) { + if (!subjects.isEmpty()) { + subjects += ", "; + } + subjects += t.getName(); + } + + metas = getStories(url, doc, subjects); + } + + return metas; + } + + /** + * Return the number of pages in this stories result listing. + * + * @param doc + * the document + * + * @return the number of pages or -1 if unknown + * + * @throws IOException + * in case of I/O errors + */ + private int getPages(Document doc) throws IOException { + int pages = -1; + + if (doc != null) { + Element center = doc.getElementsByTag("center").first(); + if (center != null) { + for (Element a : center.getElementsByTag("a")) { + if (a.absUrl("href").contains("&p=")) { + int thisLinkPages = -1; + try { + String[] tab = a.absUrl("href").split("="); + tab = tab[tab.length - 1].split("&"); + thisLinkPages = Integer + .parseInt(tab[tab.length - 1]); + } catch (Exception e) { + } + + pages = Math.max(pages, thisLinkPages); + } + } + } + } + + return pages; + } + + /** + * Fetch the stories from the given page. + * + * @param sourceUrl + * the url of the document + * @param doc + * the document to use (if NULL, will be loaded from + * sourceUrl) + * @param mainSubject + * the main subject (the anime/book/movie item related to the + * stories, like "MLP" or "Doctor Who"), or NULL if none + * + * @return the stories found in it + * + * @throws IOException + * in case of I/O errors + */ + private List getStories(String sourceUrl, Document doc, + String mainSubject) throws IOException { + List metas = new ArrayList(); + + if (doc == null) { + doc = load(sourceUrl, false); + } + + for (Element story : doc.getElementsByClass("z-list")) { + MetaData meta = new MetaData(); + meta.setImageDocument(false); + meta.setSource(getType().getSourceName()); + + // Title, URL, Cover + Element stitle = story.getElementsByClass("stitle").first(); + if (stitle != null) { + meta.setTitle(stitle.text()); + meta.setUrl(stitle.absUrl("href")); + Element cover = stitle.getElementsByTag("img").first(); + if (cover != null) { + // note: see data-original if needed? + String coverUrl = cover.absUrl("src"); + + try { + InputStream in = Instance.getCache().open( + new URL(coverUrl), getSupport(), true); + try { + meta.setCover(new Image(in)); + } finally { + in.close(); + } + } catch (Exception e) { + Instance.getTraceHandler() + .error(new Exception( + "Cannot download cover for Fanfiction story in search mode", + e)); + } + } + } + + // Author + Elements as = story.getElementsByTag("a"); + if (as.size() > 1) { + meta.setAuthor(as.get(1).text()); + } + + // Tags (concatenated text), published date, updated date, Resume + String tags = ""; + List tagList = new ArrayList(); + Elements divs = story.getElementsByTag("div"); + if (divs.size() > 1 && divs.get(1).childNodeSize() > 0) { + String resume = divs.get(1).text(); + if (divs.size() > 2) { + tags = divs.get(2).text(); + resume = resume.substring(0, + resume.length() - tags.length()).trim(); + + for (Element d : divs.get(2).getElementsByAttribute( + "data-xutime")) { + String secs = d.attr("data-xutime"); + try { + String date = new SimpleDateFormat("yyyy-MM-dd") + .format(new Date( + Long.parseLong(secs) * 1000)); + // (updated, ) published + if (meta.getDate() != null) { + tagList.add("Updated: " + meta.getDate()); + } + meta.setDate(date); + } catch (Exception e) { + } + } + } + + meta.setResume(getSupport().makeChapter(new URL(sourceUrl), 0, + Instance.getTrans().getString(StringId.DESCRIPTION), + resume)); + } + + // How are the tags ordered? + // We have "Rated: xx", then the language, then all other tags + // If the subject(s) is/are present, they are before "Rated: xx" + + // //////////// + // Examples: // + // //////////// + + // Search (Luna) Tags: [Harry Potter, Rated: T, English, Chapters: + // 1, Words: 270, Reviews: 2, Published: 2/19/2013, Luna L.] + + // Normal (MLP) Tags: [Rated: T, Spanish, Drama/Suspense, Chapters: + // 2, Words: 8,686, Reviews: 1, Favs: 1, Follows: 1, Updated: 4/7, + // Published: 4/2] + + // Crossover (MLP/Who) Tags: [Rated: K+, English, Adventure/Romance, + // Chapters: 8, Words: 7,788, Reviews: 2, Favs: 2, Follows: 1, + // Published: 9/1/2016] + + boolean rated = false; + boolean isLang = false; + String subject = mainSubject == null ? "" : mainSubject; + String[] tab = tags.split(" *- *"); + for (int i = 0; i < tab.length; i++) { + String tag = tab[i]; + if (tag.startsWith("Rated: ")) { + rated = true; + } + + if (!rated) { + if (!subject.isEmpty()) { + subject += ", "; + } + subject += tag; + } else if (isLang) { + meta.setLang(tag); + isLang = false; + } else { + if (tag.contains(":")) { + // Handle special tags: + if (tag.startsWith("Words: ")) { + try { + meta.setWords(Long.parseLong(tag + .substring("Words: ".length()) + .replace(",", "").trim())); + } catch (Exception e) { + } + } else if (tag.startsWith("Rated: ")) { + tagList.add(tag); + } + } else { + // Normal tags are "/"-separated + for (String t : tag.split("/")) { + tagList.add(t); + } + } + + if (tag.startsWith("Rated: ")) { + isLang = true; + } + } + } + + meta.setSubject(subject); + meta.setTags(tagList); + + metas.add(meta); + } + + return metas; + } +} diff --git a/src/be/nikiroo/fanfix/searchable/SearchableTag.java b/src/be/nikiroo/fanfix/searchable/SearchableTag.java new file mode 100644 index 0000000..c12b3c6 --- /dev/null +++ b/src/be/nikiroo/fanfix/searchable/SearchableTag.java @@ -0,0 +1,266 @@ +package be.nikiroo.fanfix.searchable; + +import java.util.ArrayList; +import java.util.List; + +/** + * This class represents a tag that can be searched on a supported website. + * + * @author niki + */ +public class SearchableTag { + private String id; + private String name; + private boolean complete; + private long count; + + private SearchableTag parent; + private List children; + + /** + * The number of stories result pages this tag can get. + *

+ * We keep more information than what the getter/setter returns/accepts. + *

+ */ + private int pages; + + /** + * Create a new {@link SearchableTag}. + *

+ * Note that tags are complete by default. + * + * @param id + * the ID (usually a way to find the linked stories later on) + * @param name + * the tag name, which can be displayed to the user + * @param leaf + * the tag is a leaf tag, that is, it will not return subtags + * with {@link BasicSearchable#fillTag(SearchableTag)} but will + * return stories with + * {@link BasicSearchable#search(SearchableTag)} + */ + public SearchableTag(String id, String name, boolean leaf) { + this(id, name, leaf, true); + } + + /** + * Create a new {@link SearchableTag}. + * + * @param id + * the ID (usually a way to find the linked stories later on) + * @param name + * the tag name, which can be displayed to the user + * @param leaf + * the tag is a leaf tag, that is, it will not return subtags + * with {@link BasicSearchable#fillTag(SearchableTag)} but will + * return stories with + * {@link BasicSearchable#search(SearchableTag)} + * @param complete + * the tag {@link SearchableTag#isComplete()} or not + */ + public SearchableTag(String id, String name, boolean leaf, boolean complete) { + this.id = id; + this.name = name; + this.complete = complete; + + setLeaf(leaf); + + children = new ArrayList(); + } + + /** + * The ID (usually a way to find the linked stories later on). + * + * @return the ID + */ + public String getId() { + return id; + } + + /** + * The tag name, which can be displayed to the user. + * + * @return then name + */ + public String getName() { + return name; + } + + /** + * Non-complete, non-leaf tags can still be completed via a + * {@link BasicSearchable#fillTag(SearchableTag)} operation from a + * {@link BasicSearchable}, in order to gain (more?) subtag children. + *

+ * This method does not make sense for leaf tags. + * + * @return TRUE if it is complete + */ + public boolean isComplete() { + return complete; + } + + /** + * Non-complete, non-leaf tags can still be completed via a + * {@link BasicSearchable#fillTag(SearchableTag)} operation from a + * {@link BasicSearchable}, in order to gain (more?) subtag children. + *

+ * This method does not make sense for leaf tags. + * + * @param complete + * TRUE if it is complete + */ + public void setComplete(boolean complete) { + this.complete = complete; + } + + /** + * The number of items that can be found with this tag if it is searched. + *

+ * Will report the number of subtags by default. + * + * @return the number of items + */ + public long getCount() { + long count = this.count; + if (count <= 0) { + count = children.size(); + } + + return count; + } + + /** + * The number of items that can be found with this tag if it is searched. + * + * @param count + * the new count + */ + public void setCount(long count) { + this.count = count; + } + + /** + * The number of stories result pages this tag contains, only make sense if + * {@link SearchableTag#isLeaf()} returns TRUE. + *

+ * Will return -1 if the number is not yet known. + * + * @return the number of pages, or -1 + */ + public int getPages() { + return Math.max(-1, pages); + } + + /** + * The number of stories result pages this tag contains, only make sense if + * {@link SearchableTag#isLeaf()} returns TRUE. + * + * @param pages + * the (positive or 0) number of pages + */ + public void setPages(int pages) { + this.pages = Math.max(-1, pages); + } + + /** + * This tag is a leaf tag, that is, it will not return other subtags with + * {@link BasicSearchable#fillTag(SearchableTag)} but will return stories + * with {@link BasicSearchable#search(SearchableTag)}. + * + * @return TRUE if it is + */ + public boolean isLeaf() { + return pages > -2; + } + + /** + * This tag is a leaf tag, that is, it will not return other subtags with + * {@link BasicSearchable#fillTag(SearchableTag)} but will return stories + * with {@link BasicSearchable#search(SearchableTag)}. + *

+ * Will reset the number of pages to -1. + * + * @param leaf + * TRUE if it is + */ + public void setLeaf(boolean leaf) { + pages = leaf ? -1 : -2; + } + + /** + * The subtag children of this {@link SearchableTag}. + *

+ * Never NULL. + *

+ * Note that if {@link SearchableTag#isComplete()} returns false, you can + * still fill (more?) subtag children with a {@link BasicSearchable}. + * + * @return the subtag children, never NULL + */ + public List getChildren() { + return children; + } + + /** + * Add the given {@link SearchableTag} as a subtag child. + * + * @param tag + * the tag to add + */ + public void add(SearchableTag tag) { + children.add(tag); + tag.parent = this; + } + + /** + * This {@link SearchableTag} parent tag, or NULL if none. + * + * @return the parent or NULL + */ + public SearchableTag getParent() { + return parent; + } + + /** + * Display a DEBUG {@link String} representation of this object. + */ + @Override + public String toString() { + String rep = name + " [" + id + "]"; + if (!complete) { + rep += "*"; + } + + if (getCount() > 0) { + rep += " (" + getCount() + ")"; + } + + if (!children.isEmpty()) { + String tags = ""; + int i = 1; + for (SearchableTag tag : children) { + if (!tags.isEmpty()) { + tags += ", "; + } + + if (i > 10) { + tags += "..."; + break; + } + + tags += tag; + i++; + } + + rep += ": " + tags; + } + + return rep; + } +}