From 76ec935e19dbd00dfbcaaeabfc187125f727b5ac Mon Sep 17 00:00:00 2001 From: Niki Roo Date: Wed, 10 Apr 2019 10:16:09 +0200 Subject: [PATCH] Fanfiction step2 + SearchableTags --- .../fanfix/searchable/BasicSearchable.java | 14 ++- .../nikiroo/fanfix/searchable/Fanfiction.java | 48 +++++-- .../fanfix/searchable/SearchableTag.java | 117 ++++++++++++++++-- 3 files changed, 156 insertions(+), 23 deletions(-) diff --git a/src/be/nikiroo/fanfix/searchable/BasicSearchable.java b/src/be/nikiroo/fanfix/searchable/BasicSearchable.java index 25c388a..ecc1da1 100644 --- a/src/be/nikiroo/fanfix/searchable/BasicSearchable.java +++ b/src/be/nikiroo/fanfix/searchable/BasicSearchable.java @@ -128,13 +128,16 @@ public abstract class BasicSearchable { * * @param url * the URL to load + * @param stable + * TRUE for more stable resources, FALSE when they often change + * * @return the document * * @throws IOException * in case of I/O error */ - protected Document load(String url) throws IOException { - return load(new URL(url)); + protected Document load(String url, boolean stable) throws IOException { + return load(new URL(url), stable); } /** @@ -142,13 +145,16 @@ public abstract class BasicSearchable { * * @param url * the URL to load + * @param stable + * TRUE for more stable resources, FALSE when they often change + * * @return the document * * @throws IOException * in case of I/O error */ - protected Document load(URL url) throws IOException { - return DataUtil.load(Instance.getCache().open(url, support, false), + protected Document load(URL url, boolean stable) throws IOException { + return DataUtil.load(Instance.getCache().open(url, support, stable), "UTF-8", url.toString()); } diff --git a/src/be/nikiroo/fanfix/searchable/Fanfiction.java b/src/be/nikiroo/fanfix/searchable/Fanfiction.java index f178a81..e8fbf4d 100644 --- a/src/be/nikiroo/fanfix/searchable/Fanfiction.java +++ b/src/be/nikiroo/fanfix/searchable/Fanfiction.java @@ -23,6 +23,8 @@ import be.nikiroo.fanfix.supported.SupportType; * @author niki */ class Fanfiction extends BasicSearchable { + static private String BASE_URL = "http://fanfiction.net/"; + /** * Create a new {@link Fanfiction}. * @@ -40,7 +42,7 @@ class Fanfiction extends BasicSearchable { Map stories = new HashMap(); Map crossovers = new HashMap(); - Document mainPage = load("http://fanfiction.net/"); + Document mainPage = load(BASE_URL, true); Element menu = mainPage.getElementsByClass("dropdown").first(); if (menu != null) { Element ul = menu.getElementsByClass("dropdown-menu").first(); @@ -68,17 +70,17 @@ class Fanfiction extends BasicSearchable { List tags = new ArrayList(); if (storiesName != null) { - SearchableTag tag = new SearchableTag(null, storiesName, true); + SearchableTag tag = new SearchableTag(null, storiesName, false); for (String id : stories.keySet()) { - tag.add(new SearchableTag(id, stories.get(id), false)); + tag.add(new SearchableTag(id, stories.get(id), true, false)); } tags.add(tag); } if (crossoversName != null) { - SearchableTag tag = new SearchableTag(null, crossoversName, true); + SearchableTag tag = new SearchableTag(null, crossoversName, false); for (String id : crossovers.keySet()) { - tag.add(new SearchableTag(id, crossovers.get(id), false)); + tag.add(new SearchableTag(id, crossovers.get(id), false, false)); } tags.add(tag); } @@ -92,9 +94,9 @@ class Fanfiction extends BasicSearchable { return; } - boolean subtagIsComplete = !tag.getId().contains("/crossovers/"); + boolean subtagIsLeaf = !tag.getId().contains("/crossovers/"); - Document doc = load(tag.getId()); + Document doc = load(tag.getId(), false); Element list = doc.getElementById("list_output"); if (list != null) { Element table = list.getElementsByTag("table").first(); @@ -105,7 +107,7 @@ class Fanfiction extends BasicSearchable { if (a != null) { SearchableTag subtag = new SearchableTag( - a.absUrl("href"), a.text(), subtagIsComplete); + a.absUrl("href"), a.text(), subtagIsLeaf); tag.add(subtag); if (span != null) { String nr = span.text(); @@ -145,7 +147,7 @@ class Fanfiction extends BasicSearchable { @Override public List search(String search) throws IOException { - // TODO Auto-generated method stub + // TODO /search/?reader=1&type=story&keywords=blablablab return null; } @@ -154,7 +156,29 @@ class Fanfiction extends BasicSearchable { List metas = new ArrayList(); if (tag.getId() != null) { - Document doc = load(tag.getId()); + Document doc = load(tag.getId(), false); + + Element center = doc.getElementsByTag("center").first(); + if (center != null) { + int pages = -1; + for (Element a : center.getElementsByTag("a")) { + if (a.absUrl("href").contains("&p=")) { + int thisLinkPages = -1; + try { + String[] tab = a.absUrl("href").split("="); + tab = tab[tab.length - 1].split("&"); + thisLinkPages = Integer + .parseInt(tab[tab.length - 1]); + } catch (Exception e) { + } + + pages = Math.max(pages, thisLinkPages); + } + } + + tag.setPages(pages); + } + for (Element story : doc.getElementsByClass("z-list")) { String title = ""; String url = ""; @@ -199,6 +223,7 @@ class Fanfiction extends BasicSearchable { 0, Instance.getTrans().getString(StringId.DESCRIPTION), resume)); meta.setSource(getType().getSourceName()); + // TODO: remove tags to interpret them instead (lang, words..) meta.setTags(Arrays.asList(tags.split(" *- *"))); meta.setTitle(title); meta.setUrl(url); @@ -227,6 +252,7 @@ class Fanfiction extends BasicSearchable { System.out.println(cmlp); List metas = f.search(mlp); - System.out.println(metas); + System.out.println(mlp.getPages()); + //System.out.println(metas); } } diff --git a/src/be/nikiroo/fanfix/searchable/SearchableTag.java b/src/be/nikiroo/fanfix/searchable/SearchableTag.java index c877bd7..af1ce0f 100644 --- a/src/be/nikiroo/fanfix/searchable/SearchableTag.java +++ b/src/be/nikiroo/fanfix/searchable/SearchableTag.java @@ -15,50 +15,103 @@ public class SearchableTag { private long count; private List children; + /** + * The number of stories result pages this tag can get. + *

+ * We keep more information than what the getter/setter returns/accepts. + *

    + *
  • -2: this tag does not support stories results (not a leaf tag)
  • + *
  • -1: the number is not yet known, but will be known after a + * {@link BasicSearchable#fillTag(SearchableTag)} operation
  • + *
  • X: the number of pages
  • + *
+ */ + private int pages; + /** * Create a new {@link SearchableTag}. + *

+ * Note that tags are complete by default. * * @param id * the ID (usually a way to find the linked stories later on) * @param name * the tag name, which can be displayed to the user + * @param leaf + * the tag is a leaf tag, that is, it will not return subtags + * with {@link BasicSearchable#fillTag(SearchableTag)} but will + * return stories with + * {@link BasicSearchable#search(SearchableTag)} + */ + public SearchableTag(String id, String name, boolean leaf) { + this(id, name, leaf, true); + } + + /** + * Create a new {@link SearchableTag}. + * + * @param id + * the ID (usually a way to find the linked stories later on) + * @param name + * the tag name, which can be displayed to the user + * @param leaf + * the tag is a leaf tag, that is, it will not return subtags + * with {@link BasicSearchable#fillTag(SearchableTag)} but will + * return stories with + * {@link BasicSearchable#search(SearchableTag)} * @param complete - * TRUE for a {@link SearchableTag} that cannot be "filled" by - * the {@link BasicSearchable} in order to get (more?) subtag - * children + * the tag {@link SearchableTag#isComplete()} or not */ - public SearchableTag(String id, String name, boolean complete) { + public SearchableTag(String id, String name, boolean leaf, boolean complete) { this.id = id; this.name = name; this.complete = complete; + setLeaf(leaf); + children = new ArrayList(); } + /** + * The ID (usually a way to find the linked stories later on). + * + * @return the ID + */ public String getId() { return id; } + /** + * The tag name, which can be displayed to the user. + * + * @return then name + */ public String getName() { return name; } /** - * This tag can still be completed via a "fill" tag operation from a + * Non-complete, non-leaf tags can still be completed via a + * {@link BasicSearchable#fillTag(SearchableTag)} operation from a * {@link BasicSearchable}, in order to gain (more?) subtag children. + *

+ * This method does not make sense for leaf tags. * - * @return TRUE if it can + * @return TRUE if it is complete */ public boolean isComplete() { return complete; } /** - * This tag can still be completed via a "fill" tag operation from a + * Non-complete, non-leaf tags can still be completed via a + * {@link BasicSearchable#fillTag(SearchableTag)} operation from a * {@link BasicSearchable}, in order to gain (more?) subtag children. + *

+ * This method does not make sense for leaf tags. * * @param complete - * TRUE if it can + * TRUE if it is complete */ public void setComplete(boolean complete) { this.complete = complete; @@ -115,6 +168,54 @@ public class SearchableTag { this.count = count; } + /** + * The number of stories result pages this tag contains, only make sense if + * {@link SearchableTag#isLeaf()} returns TRUE. + *

+ * Will return -1 if the number is not yet known. + * + * @return the number of pages, or -1 + */ + public int getPages() { + return Math.max(-1, pages); + } + + /** + * The number of stories result pages this tag contains, only make sense if + * {@link SearchableTag#isLeaf()} returns TRUE. + * + * @param pages + * the (positive or 0) number of pages + */ + public void setPages(int pages) { + this.pages = Math.max(-1, pages); + } + + /** + * This tag is a leaf tag, that is, it will not return other subtags with + * {@link BasicSearchable#fillTag(SearchableTag)} but will return stories + * with {@link BasicSearchable#search(SearchableTag)}. + * + * @return TRUE if it is + */ + public boolean isLeaf() { + return pages > -2; + } + + /** + * This tag is a leaf tag, that is, it will not return other subtags with + * {@link BasicSearchable#fillTag(SearchableTag)} but will return stories + * with {@link BasicSearchable#search(SearchableTag)}. + *

+ * Will reset the number of pages to -1. + * + * @param leaf + * TRUE if it is + */ + public void setLeaf(boolean leaf) { + pages = leaf ? -1 : -2; + } + /** * The subtag children of this {@link SearchableTag}. *

-- 2.27.0