Fanfiction step2 + SearchableTags
authorNiki Roo <niki@nikiroo.be>
Wed, 10 Apr 2019 08:16:09 +0000 (10:16 +0200)
committerNiki Roo <niki@nikiroo.be>
Wed, 10 Apr 2019 08:16:09 +0000 (10:16 +0200)
src/be/nikiroo/fanfix/searchable/BasicSearchable.java
src/be/nikiroo/fanfix/searchable/Fanfiction.java
src/be/nikiroo/fanfix/searchable/SearchableTag.java

index 25c388a8e268aa1bde8a26f84c4d10a2721be070..ecc1da1ef3393f2b8144c8a613b56c8343c3a458 100644 (file)
@@ -128,13 +128,16 @@ public abstract class BasicSearchable {
         * 
         * @param url
         *            the URL to load
+        * @param stable
+        *            TRUE for more stable resources, FALSE when they often change
+        * 
         * @return the document
         * 
         * @throws IOException
         *             in case of I/O error
         */
-       protected Document load(String url) throws IOException {
-               return load(new URL(url));
+       protected Document load(String url, boolean stable) throws IOException {
+               return load(new URL(url), stable);
        }
 
        /**
@@ -142,13 +145,16 @@ public abstract class BasicSearchable {
         * 
         * @param url
         *            the URL to load
+        * @param stable
+        *            TRUE for more stable resources, FALSE when they often change
+        * 
         * @return the document
         * 
         * @throws IOException
         *             in case of I/O error
         */
-       protected Document load(URL url) throws IOException {
-               return DataUtil.load(Instance.getCache().open(url, support, false),
+       protected Document load(URL url, boolean stable) throws IOException {
+               return DataUtil.load(Instance.getCache().open(url, support, stable),
                                "UTF-8", url.toString());
        }
 
index f178a815fc8b8676402a92da21236d0c3c60f17f..e8fbf4db9683e70d58197c870c34ce5f43fa8c1e 100644 (file)
@@ -23,6 +23,8 @@ import be.nikiroo.fanfix.supported.SupportType;
  * @author niki
  */
 class Fanfiction extends BasicSearchable {
+       static private String BASE_URL = "http://fanfiction.net/";
+
        /**
         * Create a new {@link Fanfiction}.
         * 
@@ -40,7 +42,7 @@ class Fanfiction extends BasicSearchable {
                Map<String, String> stories = new HashMap<String, String>();
                Map<String, String> crossovers = new HashMap<String, String>();
 
-               Document mainPage = load("http://fanfiction.net/");
+               Document mainPage = load(BASE_URL, true);
                Element menu = mainPage.getElementsByClass("dropdown").first();
                if (menu != null) {
                        Element ul = menu.getElementsByClass("dropdown-menu").first();
@@ -68,17 +70,17 @@ class Fanfiction extends BasicSearchable {
                List<SearchableTag> tags = new ArrayList<SearchableTag>();
 
                if (storiesName != null) {
-                       SearchableTag tag = new SearchableTag(null, storiesName, true);
+                       SearchableTag tag = new SearchableTag(null, storiesName, false);
                        for (String id : stories.keySet()) {
-                               tag.add(new SearchableTag(id, stories.get(id), false));
+                               tag.add(new SearchableTag(id, stories.get(id), true, false));
                        }
                        tags.add(tag);
                }
 
                if (crossoversName != null) {
-                       SearchableTag tag = new SearchableTag(null, crossoversName, true);
+                       SearchableTag tag = new SearchableTag(null, crossoversName, false);
                        for (String id : crossovers.keySet()) {
-                               tag.add(new SearchableTag(id, crossovers.get(id), false));
+                               tag.add(new SearchableTag(id, crossovers.get(id), false, false));
                        }
                        tags.add(tag);
                }
@@ -92,9 +94,9 @@ class Fanfiction extends BasicSearchable {
                        return;
                }
 
-               boolean subtagIsComplete = !tag.getId().contains("/crossovers/");
+               boolean subtagIsLeaf = !tag.getId().contains("/crossovers/");
 
-               Document doc = load(tag.getId());
+               Document doc = load(tag.getId(), false);
                Element list = doc.getElementById("list_output");
                if (list != null) {
                        Element table = list.getElementsByTag("table").first();
@@ -105,7 +107,7 @@ class Fanfiction extends BasicSearchable {
 
                                        if (a != null) {
                                                SearchableTag subtag = new SearchableTag(
-                                                               a.absUrl("href"), a.text(), subtagIsComplete);
+                                                               a.absUrl("href"), a.text(), subtagIsLeaf);
                                                tag.add(subtag);
                                                if (span != null) {
                                                        String nr = span.text();
@@ -145,7 +147,7 @@ class Fanfiction extends BasicSearchable {
 
        @Override
        public List<MetaData> search(String search) throws IOException {
-               // TODO Auto-generated method stub
+               // TODO /search/?reader=1&type=story&keywords=blablablab
                return null;
        }
 
@@ -154,7 +156,29 @@ class Fanfiction extends BasicSearchable {
                List<MetaData> metas = new ArrayList<MetaData>();
 
                if (tag.getId() != null) {
-                       Document doc = load(tag.getId());
+                       Document doc = load(tag.getId(), false);
+
+                       Element center = doc.getElementsByTag("center").first();
+                       if (center != null) {
+                               int pages = -1;
+                               for (Element a : center.getElementsByTag("a")) {
+                                       if (a.absUrl("href").contains("&p=")) {
+                                               int thisLinkPages = -1;
+                                               try {
+                                                       String[] tab = a.absUrl("href").split("=");
+                                                       tab = tab[tab.length - 1].split("&");
+                                                       thisLinkPages = Integer
+                                                                       .parseInt(tab[tab.length - 1]);
+                                               } catch (Exception e) {
+                                               }
+
+                                               pages = Math.max(pages, thisLinkPages);
+                                       }
+                               }
+
+                               tag.setPages(pages);
+                       }
+
                        for (Element story : doc.getElementsByClass("z-list")) {
                                String title = "";
                                String url = "";
@@ -199,6 +223,7 @@ class Fanfiction extends BasicSearchable {
                                                0, Instance.getTrans().getString(StringId.DESCRIPTION),
                                                resume));
                                meta.setSource(getType().getSourceName());
+                               // TODO: remove tags to interpret them instead (lang, words..)
                                meta.setTags(Arrays.asList(tags.split(" *- *")));
                                meta.setTitle(title);
                                meta.setUrl(url);
@@ -227,6 +252,7 @@ class Fanfiction extends BasicSearchable {
                System.out.println(cmlp);
 
                List<MetaData> metas = f.search(mlp);
-               System.out.println(metas);
+               System.out.println(mlp.getPages());
+               //System.out.println(metas);
        }
 }
index c877bd764f7e90a4c2aacebbd5f5bb717bba2a29..af1ce0f1a50c1571a4a2841c9733ad32998445e6 100644 (file)
@@ -15,50 +15,103 @@ public class SearchableTag {
        private long count;
        private List<SearchableTag> children;
 
+       /**
+        * The number of stories result pages this tag can get.
+        * <p>
+        * We keep more information than what the getter/setter returns/accepts.
+        * <ul>
+        * <li>-2: this tag does not support stories results (not a leaf tag)</li>
+        * <li>-1: the number is not yet known, but will be known after a
+        * {@link BasicSearchable#fillTag(SearchableTag)} operation</li>
+        * <li>X: the number of pages</li>
+        * </ul>
+        */
+       private int pages;
+
        /**
         * Create a new {@link SearchableTag}.
+        * <p>
+        * Note that tags are complete by default.
         * 
         * @param id
         *            the ID (usually a way to find the linked stories later on)
         * @param name
         *            the tag name, which can be displayed to the user
+        * @param leaf
+        *            the tag is a leaf tag, that is, it will not return subtags
+        *            with {@link BasicSearchable#fillTag(SearchableTag)} but will
+        *            return stories with
+        *            {@link BasicSearchable#search(SearchableTag)}
+        */
+       public SearchableTag(String id, String name, boolean leaf) {
+               this(id, name, leaf, true);
+       }
+
+       /**
+        * Create a new {@link SearchableTag}.
+        * 
+        * @param id
+        *            the ID (usually a way to find the linked stories later on)
+        * @param name
+        *            the tag name, which can be displayed to the user
+        * @param leaf
+        *            the tag is a leaf tag, that is, it will not return subtags
+        *            with {@link BasicSearchable#fillTag(SearchableTag)} but will
+        *            return stories with
+        *            {@link BasicSearchable#search(SearchableTag)}
         * @param complete
-        *            TRUE for a {@link SearchableTag} that cannot be "filled" by
-        *            the {@link BasicSearchable} in order to get (more?) subtag
-        *            children
+        *            the tag {@link SearchableTag#isComplete()} or not
         */
-       public SearchableTag(String id, String name, boolean complete) {
+       public SearchableTag(String id, String name, boolean leaf, boolean complete) {
                this.id = id;
                this.name = name;
                this.complete = complete;
 
+               setLeaf(leaf);
+
                children = new ArrayList<SearchableTag>();
        }
 
+       /**
+        * The ID (usually a way to find the linked stories later on).
+        * 
+        * @return the ID
+        */
        public String getId() {
                return id;
        }
 
+       /**
+        * The tag name, which can be displayed to the user.
+        * 
+        * @return then name
+        */
        public String getName() {
                return name;
        }
 
        /**
-        * This tag can still be completed via a "fill" tag operation from a
+        * Non-complete, non-leaf tags can still be completed via a
+        * {@link BasicSearchable#fillTag(SearchableTag)} operation from a
         * {@link BasicSearchable}, in order to gain (more?) subtag children.
+        * <p>
+        * This method does not make sense for leaf tags.
         * 
-        * @return TRUE if it can
+        * @return TRUE if it is complete
         */
        public boolean isComplete() {
                return complete;
        }
 
        /**
-        * This tag can still be completed via a "fill" tag operation from a
+        * Non-complete, non-leaf tags can still be completed via a
+        * {@link BasicSearchable#fillTag(SearchableTag)} operation from a
         * {@link BasicSearchable}, in order to gain (more?) subtag children.
+        * <p>
+        * This method does not make sense for leaf tags.
         * 
         * @param complete
-        *            TRUE if it can
+        *            TRUE if it is complete
         */
        public void setComplete(boolean complete) {
                this.complete = complete;
@@ -115,6 +168,54 @@ public class SearchableTag {
                this.count = count;
        }
 
+       /**
+        * The number of stories result pages this tag contains, only make sense if
+        * {@link SearchableTag#isLeaf()} returns TRUE.
+        * <p>
+        * Will return -1 if the number is not yet known.
+        * 
+        * @return the number of pages, or -1
+        */
+       public int getPages() {
+               return Math.max(-1, pages);
+       }
+
+       /**
+        * The number of stories result pages this tag contains, only make sense if
+        * {@link SearchableTag#isLeaf()} returns TRUE.
+        * 
+        * @param pages
+        *            the (positive or 0) number of pages
+        */
+       public void setPages(int pages) {
+               this.pages = Math.max(-1, pages);
+       }
+
+       /**
+        * This tag is a leaf tag, that is, it will not return other subtags with
+        * {@link BasicSearchable#fillTag(SearchableTag)} but will return stories
+        * with {@link BasicSearchable#search(SearchableTag)}.
+        * 
+        * @return TRUE if it is
+        */
+       public boolean isLeaf() {
+               return pages > -2;
+       }
+
+       /**
+        * This tag is a leaf tag, that is, it will not return other subtags with
+        * {@link BasicSearchable#fillTag(SearchableTag)} but will return stories
+        * with {@link BasicSearchable#search(SearchableTag)}.
+        * <p>
+        * Will reset the number of pages to -1.
+        * 
+        * @param leaf
+        *            TRUE if it is
+        */
+       public void setLeaf(boolean leaf) {
+               pages = leaf ? -1 : -2;
+       }
+
        /**
         * The subtag children of this {@link SearchableTag}.
         * <p>