searchable/Fanfiction.java

   1 package be.nikiroo.fanfix.searchable;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.URL;
   6 import java.net.URLEncoder;
   7 import java.text.SimpleDateFormat;
   8 import java.util.ArrayList;
   9 import java.util.Date;
  10 import java.util.HashMap;
  11 import java.util.List;
  12 import java.util.Map;
  13
  14 import org.jsoup.nodes.Document;
  15 import org.jsoup.nodes.Element;
  16 import org.jsoup.select.Elements;
  17
  18 import be.nikiroo.fanfix.Instance;
  19 import be.nikiroo.fanfix.bundles.StringId;
  20 import be.nikiroo.fanfix.data.MetaData;
  21 import be.nikiroo.fanfix.supported.SupportType;
  22 import be.nikiroo.utils.Image;
  23 import be.nikiroo.utils.StringUtils;
  24
  25 /**
  26  * A {@link BasicSearchable} for Fanfiction.NET.
  27  *
  28  * @author niki
  29  */
  30 class Fanfiction extends BasicSearchable {
  31         static private String BASE_URL = "http://fanfiction.net/";
  32
  33         /**
  34          * Create a new {@link Fanfiction}.
  35          *
  36          * @param type
  37          *            {@link SupportType#FANFICTION}
  38          */
  39         public Fanfiction(SupportType type) {
  40                 super(type);
  41         }
  42
  43         @Override
  44         public List<SearchableTag> getTags() throws IOException {
  45                 String storiesName = null;
  46                 String crossoversName = null;
  47                 Map<String, String> stories = new HashMap<String, String>();
  48                 Map<String, String> crossovers = new HashMap<String, String>();
  49
  50                 Document mainPage = load(BASE_URL, true);
  51                 Element menu = mainPage.getElementsByClass("dropdown").first();
  52                 if (menu != null) {
  53                         Element ul = menu.getElementsByClass("dropdown-menu").first();
  54                         if (ul != null) {
  55                                 Map<String, String> currentList = null;
  56                                 for (Element li : ul.getElementsByTag("li")) {
  57                                         if (li.hasClass("disabled")) {
  58                                                 if (storiesName == null) {
  59                                                         storiesName = li.text();
  60                                                         currentList = stories;
  61                                                 } else {
  62                                                         crossoversName = li.text();
  63                                                         currentList = crossovers;
  64                                                 }
  65                                         } else if (currentList != null) {
  66                                                 Element a = li.getElementsByTag("a").first();
  67                                                 if (a != null) {
  68                                                         currentList.put(a.absUrl("href"), a.text());
  69                                                 }
  70                                         }
  71                                 }
  72                         }
  73                 }
  74
  75                 List<SearchableTag> tags = new ArrayList<SearchableTag>();
  76
  77                 if (storiesName != null) {
  78                         SearchableTag tag = new SearchableTag(null, storiesName, false);
  79                         for (String id : stories.keySet()) {
  80                                 tag.add(new SearchableTag(id, stories.get(id), false, false));
  81                         }
  82                         tags.add(tag);
  83                 }
  84
  85                 if (crossoversName != null) {
  86                         SearchableTag tag = new SearchableTag(null, crossoversName, false);
  87                         for (String id : crossovers.keySet()) {
  88                                 tag.add(new SearchableTag(id, crossovers.get(id), false, false));
  89                         }
  90                         tags.add(tag);
  91                 }
  92
  93                 return tags;
  94         }
  95
  96         @Override
  97         public void fillTag(SearchableTag tag) throws IOException {
  98                 if (tag.getId() == null || tag.isComplete()) {
  99                         return;
 100                 }
 101
 102                 Document doc = load(tag.getId(), false);
 103                 Element list = doc.getElementById("list_output");
 104                 if (list != null) {
 105                         Element table = list.getElementsByTag("table").first();
 106                         if (table != null) {
 107                                 for (Element div : table.getElementsByTag("div")) {
 108                                         Element a = div.getElementsByTag("a").first();
 109                                         Element span = div.getElementsByTag("span").first();
 110
 111                                         if (a != null) {
 112                                                 String subid = a.absUrl("href");
 113                                                 boolean crossoverSubtag = subid
 114                                                                 .contains("/crossovers/");
 115
 116                                                 SearchableTag subtag = new SearchableTag(subid,
 117                                                                 a.text(), !crossoverSubtag, !crossoverSubtag);
 118
 119                                                 tag.add(subtag);
 120                                                 if (span != null) {
 121                                                         String nr = span.text();
 122                                                         if (nr.startsWith("(")) {
 123                                                                 nr = nr.substring(1);
 124                                                         }
 125                                                         if (nr.endsWith(")")) {
 126                                                                 nr = nr.substring(0, nr.length() - 1);
 127                                                         }
 128                                                         nr = nr.trim();
 129
 130                                                         // TODO: fix toNumber/fromNumber
 131                                                         nr = nr.replaceAll("\\.[0-9]*", "");
 132
 133                                                         subtag.setCount(StringUtils.toNumber(nr));
 134                                                 }
 135                                         }
 136                                 }
 137                         }
 138                 }
 139
 140                 tag.setComplete(true);
 141         }
 142
 143         @Override
 144         public List<MetaData> search(String search, int page) throws IOException {
 145                 String encoded = URLEncoder.encode(search.toLowerCase(), "utf-8");
 146                 String url = BASE_URL + "search/?ready=1&type=story&keywords="
 147                                 + encoded + "&ppage=" + page;
 148
 149                 return getStories(url, null, null);
 150         }
 151
 152         @Override
 153         public List<MetaData> search(SearchableTag tag, int page)
 154                         throws IOException {
 155                 List<MetaData> metas = new ArrayList<MetaData>();
 156
 157                 String url = tag.getId();
 158                 if (url != null) {
 159                         if (page > 1) {
 160                                 int pos = url.indexOf("&p=");
 161                                 if (pos >= 0) {
 162                                         url = url.replaceAll("(.*\\&p=)[0-9]*(.*)", "$1\\" + page
 163                                                         + "$2");
 164                                 } else {
 165                                         url += "&p=" + page;
 166                                 }
 167                         }
 168
 169                         Document doc = load(url, false);
 170
 171                         // Update the pages number if needed
 172                         if (tag.getPages() < 0 && tag.isLeaf()) {
 173                                 tag.setPages(getPages(doc));
 174                         }
 175
 176                         // Find out the full subjects (including parents)
 177                         String subjects = "";
 178                         for (SearchableTag t = tag; t != null; t = t.getParent()) {
 179                                 if (!subjects.isEmpty()) {
 180                                         subjects += ", ";
 181                                 }
 182                                 subjects += t.getName();
 183                         }
 184
 185                         metas = getStories(url, doc, subjects);
 186                 }
 187
 188                 return metas;
 189         }
 190
 191         @Override
 192         public int searchPages(String search) throws IOException {
 193                 String encoded = URLEncoder.encode(search.toLowerCase(), "utf-8");
 194                 String url = BASE_URL + "search/?ready=1&type=story&keywords="
 195                                 + encoded;
 196
 197                 return getPages(load(url, false));
 198         }
 199
 200         @Override
 201         public int searchPages(SearchableTag tag) throws IOException {
 202                 if (tag.isLeaf()) {
 203                         String url = tag.getId();
 204                         return getPages(load(url, false));
 205                 }
 206
 207                 return 0;
 208         }
 209
 210         /**
 211          * Return the number of pages in this stories result listing.
 212          *
 213          * @param doc
 214          *            the document
 215          *
 216          * @return the number of pages or -1 if unknown
 217          */
 218         private int getPages(Document doc) {
 219                 int pages = -1;
 220
 221                 if (doc != null) {
 222                         Element center = doc.getElementsByTag("center").first();
 223                         if (center != null) {
 224                                 for (Element a : center.getElementsByTag("a")) {
 225                                         if (a.absUrl("href").contains("&p=")) {
 226                                                 int thisLinkPages = -1;
 227                                                 try {
 228                                                         String[] tab = a.absUrl("href").split("=");
 229                                                         tab = tab[tab.length - 1].split("&");
 230                                                         thisLinkPages = Integer
 231                                                                         .parseInt(tab[tab.length - 1]);
 232                                                 } catch (Exception e) {
 233                                                 }
 234
 235                                                 pages = Math.max(pages, thisLinkPages);
 236                                         }
 237                                 }
 238                         }
 239                 }
 240
 241                 return pages;
 242         }
 243
 244         /**
 245          * Fetch the stories from the given page.
 246          *
 247          * @param sourceUrl
 248          *            the url of the document
 249          * @param doc
 250          *            the document to use (if NULL, will be loaded from
 251          *            <tt>sourceUrl</tt>)
 252          * @param mainSubject
 253          *            the main subject (the anime/book/movie item related to the
 254          *            stories, like "MLP" or "Doctor Who"), or NULL if none
 255          *
 256          * @return the stories found in it
 257          *
 258          * @throws IOException
 259          *             in case of I/O errors
 260          */
 261         private List<MetaData> getStories(String sourceUrl, Document doc,
 262                         String mainSubject) throws IOException {
 263                 List<MetaData> metas = new ArrayList<MetaData>();
 264
 265                 if (doc == null) {
 266                         doc = load(sourceUrl, false);
 267                 }
 268
 269                 for (Element story : doc.getElementsByClass("z-list")) {
 270                         MetaData meta = new MetaData();
 271                         meta.setImageDocument(false);
 272                         meta.setSource(getType().getSourceName());
 273                         meta.setPublisher(getType().getSourceName());
 274                         meta.setType(getType().toString());
 275
 276                         // Title, URL, Cover
 277                         Element stitle = story.getElementsByClass("stitle").first();
 278                         if (stitle != null) {
 279                                 meta.setTitle(stitle.text());
 280                                 meta.setUrl(stitle.absUrl("href"));
 281                                 meta.setUuid(meta.getUrl());
 282                                 Element cover = stitle.getElementsByTag("img").first();
 283                                 if (cover != null) {
 284                                         // note: see data-original if needed?
 285                                         String coverUrl = cover.absUrl("src");
 286
 287                                         try {
 288                                                 InputStream in = Instance.getInstance().getCache().open(new URL(coverUrl), getSupport(), true);
 289                                                 try {
 290                                                         Image img = new Image(in);
 291                                                         if (img.getSize() == 0) {
 292                                                                 img.close();
 293                                                                 throw new IOException(
 294                                                                                 "Empty image not accepted");
 295                                                         }
 296                                                         meta.setCover(img);
 297                                                 } finally {
 298                                                         in.close();
 299                                                 }
 300                                         } catch (Exception e) {
 301                                                 // Should not happen on Fanfiction.net
 302                                                 Instance.getInstance().getTraceHandler().error(new Exception(
 303                                                                 "Cannot download cover for Fanfiction story in search mode: " + meta.getTitle(), e));
 304                                         }
 305                                 }
 306                         }
 307
 308                         // Author
 309                         Elements as = story.getElementsByTag("a");
 310                         if (as.size() > 1) {
 311                                 meta.setAuthor(as.get(1).text());
 312                         }
 313
 314                         // Tags (concatenated text), published date, updated date, Resume
 315                         String tags = "";
 316                         List<String> tagList = new ArrayList<String>();
 317                         Elements divs = story.getElementsByTag("div");
 318                         if (divs.size() > 1 && divs.get(1).childNodeSize() > 0) {
 319                                 String resume = divs.get(1).text();
 320                                 if (divs.size() > 2) {
 321                                         tags = divs.get(2).text();
 322                                         resume = resume.substring(0,
 323                                                         resume.length() - tags.length()).trim();
 324
 325                                         for (Element d : divs.get(2).getElementsByAttribute(
 326                                                         "data-xutime")) {
 327                                                 String secs = d.attr("data-xutime");
 328                                                 try {
 329                                                         String date = new SimpleDateFormat("yyyy-MM-dd")
 330                                                                         .format(new Date(
 331                                                                                         Long.parseLong(secs) * 1000));
 332                                                         // (updated, ) published
 333                                                         if (meta.getDate() != null) {
 334                                                                 tagList.add("Updated: " + meta.getDate());
 335                                                         }
 336                                                         meta.setDate(date);
 337                                                 } catch (Exception e) {
 338                                                 }
 339                                         }
 340                                 }
 341
 342                                 meta.setResume(getSupport().makeChapter(new URL(sourceUrl), 0,
 343                                                 Instance.getInstance().getTrans().getString(StringId.DESCRIPTION), resume));
 344                         }
 345
 346                         // How are the tags ordered?
 347                         // We have "Rated: xx", then the language, then all other tags
 348                         // If the subject(s) is/are present, they are before "Rated: xx"
 349
 350                         // ////////////
 351                         // Examples: //
 352                         // ////////////
 353
 354                         // Search (Luna) Tags: [Harry Potter, Rated: T, English, Chapters:
 355                         // 1, Words: 270, Reviews: 2, Published: 2/19/2013, Luna L.]
 356
 357                         // Normal (MLP) Tags: [Rated: T, Spanish, Drama/Suspense, Chapters:
 358                         // 2, Words: 8,686, Reviews: 1, Favs: 1, Follows: 1, Updated: 4/7,
 359                         // Published: 4/2]
 360
 361                         // Crossover (MLP/Who) Tags: [Rated: K+, English, Adventure/Romance,
 362                         // Chapters: 8, Words: 7,788, Reviews: 2, Favs: 2, Follows: 1,
 363                         // Published: 9/1/2016]
 364
 365                         boolean rated = false;
 366                         boolean isLang = false;
 367                         String subject = mainSubject == null ? "" : mainSubject;
 368                         String[] tab = tags.split("  *-  *");
 369                         for (int i = 0; i < tab.length; i++) {
 370                                 String tag = tab[i];
 371                                 if (tag.startsWith("Rated: ")) {
 372                                         rated = true;
 373                                 }
 374
 375                                 if (!rated) {
 376                                         if (!subject.isEmpty()) {
 377                                                 subject += ", ";
 378                                         }
 379                                         subject += tag;
 380                                 } else if (isLang) {
 381                                         meta.setLang(tag);
 382                                         isLang = false;
 383                                 } else {
 384                                         if (tag.contains(":")) {
 385                                                 // Handle special tags:
 386                                                 if (tag.startsWith("Words: ")) {
 387                                                         try {
 388                                                                 meta.setWords(Long.parseLong(tag
 389                                                                                 .substring("Words: ".length())
 390                                                                                 .replace(",", "").trim()));
 391                                                         } catch (Exception e) {
 392                                                         }
 393                                                 } else if (tag.startsWith("Rated: ")) {
 394                                                         tagList.add(tag);
 395                                                 }
 396                                         } else {
 397                                                 // Normal tags are "/"-separated
 398                                                 for (String t : tag.split("/")) {
 399                                                         tagList.add(t);
 400                                                 }
 401                                         }
 402
 403                                         if (tag.startsWith("Rated: ")) {
 404                                                 isLang = true;
 405                                         }
 406                                 }
 407                         }
 408
 409                         meta.setSubject(subject);
 410                         meta.setTags(tagList);
 411
 412                         metas.add(meta);
 413                 }
 414
 415                 return metas;
 416         }
 417 }