search Fanfiction: done
authorNiki Roo <niki@nikiroo.be>
Wed, 10 Apr 2019 18:22:16 +0000 (20:22 +0200)
committerNiki Roo <niki@nikiroo.be>
Wed, 10 Apr 2019 18:22:16 +0000 (20:22 +0200)
src/be/nikiroo/fanfix/searchable/BasicSearchable.java
src/be/nikiroo/fanfix/searchable/Fanfiction.java
src/be/nikiroo/fanfix/searchable/SearchableTag.java

index ecc1da1ef3393f2b8144c8a613b56c8343c3a458..c6394430e6e08c8b34ff49e2a0d2b819f6da47bc 100644 (file)
@@ -115,13 +115,18 @@ public abstract class BasicSearchable {
         * 
         * @param tagId
         *            the tag to search for
+        * @param page
+        *            the page to use for result pagination (see
+        *            {@link SearchableTag#getPages()}, remember to check for -1),
+        *            index is 1-based
         * 
         * @return a list of stories that satisfy that search term
         * 
         * @throws IOException
         *             in case of I/O error
         */
-       abstract public List<MetaData> search(SearchableTag tag) throws IOException;
+       abstract public List<MetaData> search(SearchableTag tag, int page)
+                       throws IOException;
 
        /**
         * Load a document from its url.
index e8fbf4db9683e70d58197c870c34ce5f43fa8c1e..362b5433d7d3fc81e163a03a08273e9f2319f03f 100644 (file)
@@ -1,9 +1,12 @@
 package be.nikiroo.fanfix.searchable;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.net.URL;
+import java.net.URLEncoder;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
-import java.util.Arrays;
+import java.util.Date;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -16,6 +19,7 @@ import be.nikiroo.fanfix.Instance;
 import be.nikiroo.fanfix.bundles.StringId;
 import be.nikiroo.fanfix.data.MetaData;
 import be.nikiroo.fanfix.supported.SupportType;
+import be.nikiroo.utils.Image;
 
 /**
  * A {@link BasicSearchable} for Fanfiction.NET.
@@ -94,8 +98,6 @@ class Fanfiction extends BasicSearchable {
                        return;
                }
 
-               boolean subtagIsLeaf = !tag.getId().contains("/crossovers/");
-
                Document doc = load(tag.getId(), false);
                Element list = doc.getElementById("list_output");
                if (list != null) {
@@ -106,8 +108,13 @@ class Fanfiction extends BasicSearchable {
                                        Element span = div.getElementsByTag("span").first();
 
                                        if (a != null) {
-                                               SearchableTag subtag = new SearchableTag(
-                                                               a.absUrl("href"), a.text(), subtagIsLeaf);
+                                               String subid = a.absUrl("href");
+                                               boolean crossoverSubtag = subid
+                                                               .contains("/crossovers/");
+
+                                               SearchableTag subtag = new SearchableTag(subid,
+                                                               a.text(), !crossoverSubtag, !crossoverSubtag);
+
                                                tag.add(subtag);
                                                if (span != null) {
                                                        String nr = span.text();
@@ -147,20 +154,68 @@ class Fanfiction extends BasicSearchable {
 
        @Override
        public List<MetaData> search(String search) throws IOException {
-               // TODO /search/?reader=1&type=story&keywords=blablablab
-               return null;
+               String encoded = URLEncoder.encode(search.toLowerCase(), "utf-8");
+               return getStories(
+                               "http://fanfiction.net/search/?ready=1&type=story&keywords="
+                                               + encoded, null, null);
        }
 
        @Override
-       public List<MetaData> search(SearchableTag tag) throws IOException {
+       public List<MetaData> search(SearchableTag tag, int page)
+                       throws IOException {
                List<MetaData> metas = new ArrayList<MetaData>();
 
-               if (tag.getId() != null) {
-                       Document doc = load(tag.getId(), false);
+               String url = tag.getId();
+               if (url != null) {
+                       if (page > 1) {
+                               int pos = url.indexOf("&p=");
+                               if (pos >= 0) {
+                                       url = url.replaceAll("(.*\\&p=)[0-9]*(.*)", "$1\\" + page
+                                                       + "$2");
+                               } else {
+                                       url += "&p=" + page;
+                               }
+                       }
+
+                       Document doc = load(url, false);
+
+                       // Update the pages number if needed
+                       if (tag.getPages() < 0) {
+                               tag.setPages(getPages(doc));
+                       }
+
+                       // Find out the full subjects (including parents)
+                       String subjects = "";
+                       for (SearchableTag t = tag; t != null; t = t.getParent()) {
+                               if (!subjects.isEmpty()) {
+                                       subjects += ", ";
+                               }
+                               subjects += t.getName();
+                       }
+
+                       metas = getStories(url, doc, subjects);
+               }
+
+               return metas;
+       }
+
+       /**
+        * Return the number of pages in this stories result listing.
+        * 
+        * @param doc
+        *            the document
+        * 
+        * @return the number of pages or -1 if unknown
+        * 
+        * @throws IOException
+        *             in case of I/O errors
+        */
+       private int getPages(Document doc) throws IOException {
+               int pages = -1;
 
+               if (doc != null) {
                        Element center = doc.getElementsByTag("center").first();
                        if (center != null) {
-                               int pages = -1;
                                for (Element a : center.getElementsByTag("a")) {
                                        if (a.absUrl("href").contains("&p=")) {
                                                int thisLinkPages = -1;
@@ -175,84 +230,177 @@ class Fanfiction extends BasicSearchable {
                                                pages = Math.max(pages, thisLinkPages);
                                        }
                                }
-
-                               tag.setPages(pages);
                        }
+               }
+
+               return pages;
+       }
+
+       /**
+        * Fetch the stories from the given page.
+        * 
+        * @param sourceUrl
+        *            the url of the document
+        * @param doc
+        *            the document to use (if NULL, will be loaded from
+        *            <tt>sourceUrl</tt>)
+        * @param mainSubject
+        *            the main subject (the anime/book/movie item related to the
+        *            stories, like "MLP" or "Doctor Who"), or NULL if none
+        * 
+        * @return the stories found in it
+        * 
+        * @throws IOException
+        *             in case of I/O errors
+        */
+       private List<MetaData> getStories(String sourceUrl, Document doc,
+                       String mainSubject) throws IOException {
+               List<MetaData> metas = new ArrayList<MetaData>();
+
+               if (doc == null) {
+                       doc = load(sourceUrl, false);
+               }
 
-                       for (Element story : doc.getElementsByClass("z-list")) {
-                               String title = "";
-                               String url = "";
-                               String coverUrl = "";
-
-                               Element stitle = story.getElementsByClass("stitle").first();
-                               if (stitle != null) {
-                                       title = stitle.text();
-                                       url = stitle.absUrl("href");
-                                       Element cover = stitle.getElementsByTag("img").first();
-                                       if (cover != null) {
-                                               // note: see data-original if needed?
-                                               coverUrl = cover.absUrl("src");
+               for (Element story : doc.getElementsByClass("z-list")) {
+                       MetaData meta = new MetaData();
+                       meta.setImageDocument(false);
+                       meta.setSource(getType().getSourceName());
+
+                       String subject = mainSubject == null ? "" : mainSubject;
+                       List<String> tagList = new ArrayList<String>();
+
+                       Element stitle = story.getElementsByClass("stitle").first();
+                       if (stitle != null) {
+                               meta.setTitle(stitle.text());
+                               meta.setUrl(stitle.absUrl("href"));
+                               Element cover = stitle.getElementsByTag("img").first();
+                               if (cover != null) {
+                                       // note: see data-original if needed?
+                                       String coverUrl = cover.absUrl("src");
+
+                                       try {
+                                               InputStream in = Instance.getCache().open(
+                                                               new URL(coverUrl), getSupport(), true);
+                                               try {
+                                                       meta.setCover(new Image(in));
+                                               } finally {
+                                                       in.close();
+                                               }
+                                       } catch (Exception e) {
+                                               Instance.getTraceHandler()
+                                                               .error(new Exception(
+                                                                               "Cannot download cover for Fanfiction story in search mode",
+                                                                               e));
                                        }
                                }
+                       }
 
-                               String author = "";
+                       Elements as = story.getElementsByTag("a");
+                       if (as.size() > 1) {
+                               meta.setAuthor(as.get(1).text());
+                       }
 
-                               Elements as = story.getElementsByTag("a");
-                               if (as.size() > 1) {
-                                       author = as.get(1).text();
-                               }
+                       String tags = "";
 
-                               String resume = "";
-                               String tags = "";
+                       Elements divs = story.getElementsByTag("div");
+                       if (divs.size() > 1 && divs.get(1).childNodeSize() > 0) {
+                               String resume = divs.get(1).text();
+                               if (divs.size() > 2) {
+                                       tags = divs.get(2).text();
+                                       resume = resume.substring(0,
+                                                       resume.length() - tags.length()).trim();
 
-                               Elements divs = story.getElementsByTag("div");
-                               if (divs.size() > 1 && divs.get(1).childNodeSize() > 0) {
-                                       resume = divs.get(1).text();
-                                       if (divs.size() > 2) {
-                                               tags = divs.get(2).text();
-                                               resume = resume.substring(0,
-                                                               resume.length() - tags.length()).trim();
+                                       for (Element d : divs.get(2).getElementsByAttribute(
+                                                       "data-xutime")) {
+                                               String secs = d.attr("data-xutime");
+                                               try {
+                                                       String date = new SimpleDateFormat("yyyy-MM-dd")
+                                                                       .format(new Date(
+                                                                                       Long.parseLong(secs) * 1000));
+                                                       // (updated, ) published
+                                                       if (meta.getDate() != null) {
+                                                               tagList.add("Updated: " + meta.getDate());
+                                                       }
+                                                       meta.setDate(date);
+                                               } catch (Exception e) {
+                                               }
                                        }
                                }
 
-                               MetaData meta = new MetaData();
-                               meta.setAuthor(author);
-                               // meta.setCover(cover); //TODO ?
-                               meta.setImageDocument(false);
-                               meta.setResume(getSupport().makeChapter(new URL(tag.getId()),
-                                               0, Instance.getTrans().getString(StringId.DESCRIPTION),
+                               meta.setResume(getSupport().makeChapter(new URL(sourceUrl), 0,
+                                               Instance.getTrans().getString(StringId.DESCRIPTION),
                                                resume));
-                               meta.setSource(getType().getSourceName());
-                               // TODO: remove tags to interpret them instead (lang, words..)
-                               meta.setTags(Arrays.asList(tags.split(" *- *")));
-                               meta.setTitle(title);
-                               meta.setUrl(url);
-
-                               metas.add(meta);
                        }
-               }
 
-               return metas;
-       }
+                       // How are the tags ordered?
+                       // We have "Rated: xx", then the language, then all other tags
+                       // If the subject(s) is/are present, they are before "Rated: xx"
+
+                       // /////////////
+                       // Examples: //
+                       // /////////////
+
+                       // Search (Luna) Tags: [Harry Potter, Rated: T, English, Chapters:
+                       // 1, Words: 270, Reviews: 2, Published: 2/19/2013, Luna L.]
+
+                       // Normal (MLP) Tags: [Rated: T, Spanish, Drama/Suspense, Chapters:
+                       // 2, Words: 8,686, Reviews: 1, Favs: 1, Follows: 1, Updated: 4/7,
+                       // Published:
+                       // 4/2]
+
+                       // Crossover (MLP/Who) Tags: [Rated: K+, English, Adventure/Romance,
+                       // Chapters: 8, Words: 7,788, Reviews: 2, Favs: 2, Follows: 1,
+                       // Published:
+                       // 9/1/2016]
+
+                       boolean rated = false;
+                       boolean isLang = false;
+                       String[] tab = tags.split("  *-  *");
+                       for (int i = 0; i < tab.length; i++) {
+                               String tag = tab[i];
+                               if (tag.startsWith("Rated: ")) {
+                                       rated = true;
+                               }
 
-       public static void main(String[] args) throws IOException {
-               Fanfiction f = new Fanfiction(SupportType.FANFICTION);
+                               if (!rated) {
+                                       if (!subject.isEmpty()) {
+                                               subject += ", ";
+                                       }
+                                       subject += tag;
+                               } else if (isLang) {
+                                       meta.setLang(tag);
+                                       isLang = false;
+                               } else {
+                                       if (tag.contains(":")) {
+                                               // Handle special tags:
+                                               if (tag.startsWith("Words: ")) {
+                                                       try {
+                                                               meta.setWords(Long.parseLong(tag
+                                                                               .substring("Words: ".length())
+                                                                               .replace(",", "").trim()));
+                                                       } catch (Exception e) {
+                                                       }
+                                               } else if (tag.startsWith("Rated: ")) {
+                                                       tagList.add(tag);
+                                               }
+                                       } else {
+                                               for (String t : tag.split("/")) {
+                                                       tagList.add(t);
+                                               }
+                                       }
 
-               SearchableTag cartoons = f.getTags().get(0).getChildren().get(2);
-               f.fillTag(cartoons);
-               SearchableTag mlp = cartoons.getChildren().get(2);
-               System.out.println(mlp);
+                                       if (tag.startsWith("Rated: ")) {
+                                               isLang = true;
+                                       }
+                               }
+                       }
 
-               SearchableTag ccartoons = f.getTags().get(1).getChildren().get(0);
-               f.fillTag(ccartoons);
-               SearchableTag cmlp = ccartoons.getChildren().get(0);
-               System.out.println(cmlp);
+                       meta.setSubject(subject);
+                       meta.setTags(tagList);
 
-               f.fillTag(cmlp);
-               System.out.println(cmlp);
+                       metas.add(meta);
+               }
 
-               List<MetaData> metas = f.search(mlp);
-               System.out.println(mlp.getPages());
-               //System.out.println(metas);
+               return metas;
        }
 }
index af1ce0f1a50c1571a4a2841c9733ad32998445e6..eebbe09849520b9b681639132e24d7a946425797 100644 (file)
@@ -13,6 +13,8 @@ public class SearchableTag {
        private String name;
        private boolean complete;
        private long count;
+
+       private SearchableTag parent;
        private List<SearchableTag> children;
 
        /**
@@ -238,6 +240,16 @@ public class SearchableTag {
         */
        public void add(SearchableTag tag) {
                children.add(tag);
+               tag.parent = this;
+       }
+
+       /**
+        * This {@link SearchableTag} parent tag, or NULL if none.
+        * 
+        * @return the parent or NULL
+        */
+       public SearchableTag getParent() {
+               return parent;
        }
 
        /**