be/nikiroo/fanfix/searchable/Fanfiction.java

   1 package be.nikiroo.fanfix.searchable;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.URL;
   6 import java.net.URLEncoder;
   7 import java.text.SimpleDateFormat;
   8 import java.util.ArrayList;
   9 import java.util.Date;
  10 import java.util.HashMap;
  11 import java.util.List;
  12 import java.util.Map;
  13
  14 import org.jsoup.nodes.Document;
  15 import org.jsoup.nodes.Element;
  16 import org.jsoup.select.Elements;
  17
  18 import be.nikiroo.fanfix.Instance;
  19 import be.nikiroo.fanfix.bundles.StringId;
  20 import be.nikiroo.fanfix.data.MetaData;
  21 import be.nikiroo.fanfix.supported.SupportType;
  22 import be.nikiroo.utils.Image;
  23
  24 /**
  25  * A {@link BasicSearchable} for Fanfiction.NET.
  26  *
  27  * @author niki
  28  */
  29 class Fanfiction extends BasicSearchable {
  30         static private String BASE_URL = "http://fanfiction.net/";
  31
  32         /**
  33          * Create a new {@link Fanfiction}.
  34          *
  35          * @param type
  36          *            {@link SupportType#FANFICTION}
  37          */
  38         public Fanfiction(SupportType type) {
  39                 super(type);
  40         }
  41
  42         @Override
  43         public List<SearchableTag> getTags() throws IOException {
  44                 String storiesName = null;
  45                 String crossoversName = null;
  46                 Map<String, String> stories = new HashMap<String, String>();
  47                 Map<String, String> crossovers = new HashMap<String, String>();
  48
  49                 Document mainPage = load(BASE_URL, true);
  50                 Element menu = mainPage.getElementsByClass("dropdown").first();
  51                 if (menu != null) {
  52                         Element ul = menu.getElementsByClass("dropdown-menu").first();
  53                         if (ul != null) {
  54                                 Map<String, String> currentList = null;
  55                                 for (Element li : ul.getElementsByTag("li")) {
  56                                         if (li.hasClass("disabled")) {
  57                                                 if (storiesName == null) {
  58                                                         storiesName = li.text();
  59                                                         currentList = stories;
  60                                                 } else {
  61                                                         crossoversName = li.text();
  62                                                         currentList = crossovers;
  63                                                 }
  64                                         } else if (currentList != null) {
  65                                                 Element a = li.getElementsByTag("a").first();
  66                                                 if (a != null) {
  67                                                         currentList.put(a.absUrl("href"), a.text());
  68                                                 }
  69                                         }
  70                                 }
  71                         }
  72                 }
  73
  74                 List<SearchableTag> tags = new ArrayList<SearchableTag>();
  75
  76                 if (storiesName != null) {
  77                         SearchableTag tag = new SearchableTag(null, storiesName, false);
  78                         for (String id : stories.keySet()) {
  79                                 tag.add(new SearchableTag(id, stories.get(id), true, false));
  80                         }
  81                         tags.add(tag);
  82                 }
  83
  84                 if (crossoversName != null) {
  85                         SearchableTag tag = new SearchableTag(null, crossoversName, false);
  86                         for (String id : crossovers.keySet()) {
  87                                 tag.add(new SearchableTag(id, crossovers.get(id), false, false));
  88                         }
  89                         tags.add(tag);
  90                 }
  91
  92                 return tags;
  93         }
  94
  95         @Override
  96         protected void fillTag(SearchableTag tag) throws IOException {
  97                 if (tag.getId() == null || tag.isComplete()) {
  98                         return;
  99                 }
 100
 101                 Document doc = load(tag.getId(), false);
 102                 Element list = doc.getElementById("list_output");
 103                 if (list != null) {
 104                         Element table = list.getElementsByTag("table").first();
 105                         if (table != null) {
 106                                 for (Element div : table.getElementsByTag("div")) {
 107                                         Element a = div.getElementsByTag("a").first();
 108                                         Element span = div.getElementsByTag("span").first();
 109
 110                                         if (a != null) {
 111                                                 String subid = a.absUrl("href");
 112                                                 boolean crossoverSubtag = subid
 113                                                                 .contains("/crossovers/");
 114
 115                                                 SearchableTag subtag = new SearchableTag(subid,
 116                                                                 a.text(), !crossoverSubtag, !crossoverSubtag);
 117
 118                                                 tag.add(subtag);
 119                                                 if (span != null) {
 120                                                         String nr = span.text();
 121                                                         if (nr.startsWith("(")) {
 122                                                                 nr = nr.substring(1);
 123                                                         }
 124                                                         if (nr.endsWith(")")) {
 125                                                                 nr = nr.substring(0, nr.length() - 1);
 126                                                         }
 127                                                         nr = nr.trim();
 128
 129                                                         long count = 0;
 130                                                         try {
 131                                                                 if (nr.toLowerCase().endsWith("m")) {
 132                                                                         count = Long.parseLong(nr.substring(0,
 133                                                                                         nr.length() - 1).trim());
 134                                                                         count *= 1000000;
 135                                                                 } else if (nr.toLowerCase().endsWith("k")) {
 136                                                                         count = Long.parseLong(nr.substring(0,
 137                                                                                         nr.length() - 1).trim());
 138                                                                         count *= 1000;
 139                                                                 } else {
 140                                                                         count = Long.parseLong(nr);
 141                                                                 }
 142                                                         } catch (NumberFormatException pe) {
 143                                                         }
 144
 145                                                         subtag.setCount(count);
 146                                                 }
 147                                         }
 148                                 }
 149                         }
 150                 }
 151
 152                 tag.setComplete(true);
 153         }
 154
 155         @Override
 156         public List<MetaData> search(String search) throws IOException {
 157                 String encoded = URLEncoder.encode(search.toLowerCase(), "utf-8");
 158                 return getStories(
 159                                 "http://fanfiction.net/search/?ready=1&type=story&keywords="
 160                                                 + encoded, null, null);
 161         }
 162
 163         @Override
 164         public List<MetaData> search(SearchableTag tag, int page)
 165                         throws IOException {
 166                 List<MetaData> metas = new ArrayList<MetaData>();
 167
 168                 String url = tag.getId();
 169                 if (url != null) {
 170                         if (page > 1) {
 171                                 int pos = url.indexOf("&p=");
 172                                 if (pos >= 0) {
 173                                         url = url.replaceAll("(.*\\&p=)[0-9]*(.*)", "$1\\" + page
 174                                                         + "$2");
 175                                 } else {
 176                                         url += "&p=" + page;
 177                                 }
 178                         }
 179
 180                         Document doc = load(url, false);
 181
 182                         // Update the pages number if needed
 183                         if (tag.getPages() < 0) {
 184                                 tag.setPages(getPages(doc));
 185                         }
 186
 187                         // Find out the full subjects (including parents)
 188                         String subjects = "";
 189                         for (SearchableTag t = tag; t != null; t = t.getParent()) {
 190                                 if (!subjects.isEmpty()) {
 191                                         subjects += ", ";
 192                                 }
 193                                 subjects += t.getName();
 194                         }
 195
 196                         metas = getStories(url, doc, subjects);
 197                 }
 198
 199                 return metas;
 200         }
 201
 202         /**
 203          * Return the number of pages in this stories result listing.
 204          *
 205          * @param doc
 206          *            the document
 207          *
 208          * @return the number of pages or -1 if unknown
 209          *
 210          * @throws IOException
 211          *             in case of I/O errors
 212          */
 213         private int getPages(Document doc) throws IOException {
 214                 int pages = -1;
 215
 216                 if (doc != null) {
 217                         Element center = doc.getElementsByTag("center").first();
 218                         if (center != null) {
 219                                 for (Element a : center.getElementsByTag("a")) {
 220                                         if (a.absUrl("href").contains("&p=")) {
 221                                                 int thisLinkPages = -1;
 222                                                 try {
 223                                                         String[] tab = a.absUrl("href").split("=");
 224                                                         tab = tab[tab.length - 1].split("&");
 225                                                         thisLinkPages = Integer
 226                                                                         .parseInt(tab[tab.length - 1]);
 227                                                 } catch (Exception e) {
 228                                                 }
 229
 230                                                 pages = Math.max(pages, thisLinkPages);
 231                                         }
 232                                 }
 233                         }
 234                 }
 235
 236                 return pages;
 237         }
 238
 239         /**
 240          * Fetch the stories from the given page.
 241          *
 242          * @param sourceUrl
 243          *            the url of the document
 244          * @param doc
 245          *            the document to use (if NULL, will be loaded from
 246          *            <tt>sourceUrl</tt>)
 247          * @param mainSubject
 248          *            the main subject (the anime/book/movie item related to the
 249          *            stories, like "MLP" or "Doctor Who"), or NULL if none
 250          *
 251          * @return the stories found in it
 252          *
 253          * @throws IOException
 254          *             in case of I/O errors
 255          */
 256         private List<MetaData> getStories(String sourceUrl, Document doc,
 257                         String mainSubject) throws IOException {
 258                 List<MetaData> metas = new ArrayList<MetaData>();
 259
 260                 if (doc == null) {
 261                         doc = load(sourceUrl, false);
 262                 }
 263
 264                 for (Element story : doc.getElementsByClass("z-list")) {
 265                         MetaData meta = new MetaData();
 266                         meta.setImageDocument(false);
 267                         meta.setSource(getType().getSourceName());
 268
 269                         String subject = mainSubject == null ? "" : mainSubject;
 270                         List<String> tagList = new ArrayList<String>();
 271
 272                         Element stitle = story.getElementsByClass("stitle").first();
 273                         if (stitle != null) {
 274                                 meta.setTitle(stitle.text());
 275                                 meta.setUrl(stitle.absUrl("href"));
 276                                 Element cover = stitle.getElementsByTag("img").first();
 277                                 if (cover != null) {
 278                                         // note: see data-original if needed?
 279                                         String coverUrl = cover.absUrl("src");
 280
 281                                         try {
 282                                                 InputStream in = Instance.getCache().open(
 283                                                                 new URL(coverUrl), getSupport(), true);
 284                                                 try {
 285                                                         meta.setCover(new Image(in));
 286                                                 } finally {
 287                                                         in.close();
 288                                                 }
 289                                         } catch (Exception e) {
 290                                                 Instance.getTraceHandler()
 291                                                                 .error(new Exception(
 292                                                                                 "Cannot download cover for Fanfiction story in search mode",
 293                                                                                 e));
 294                                         }
 295                                 }
 296                         }
 297
 298                         Elements as = story.getElementsByTag("a");
 299                         if (as.size() > 1) {
 300                                 meta.setAuthor(as.get(1).text());
 301                         }
 302
 303                         String tags = "";
 304
 305                         Elements divs = story.getElementsByTag("div");
 306                         if (divs.size() > 1 && divs.get(1).childNodeSize() > 0) {
 307                                 String resume = divs.get(1).text();
 308                                 if (divs.size() > 2) {
 309                                         tags = divs.get(2).text();
 310                                         resume = resume.substring(0,
 311                                                         resume.length() - tags.length()).trim();
 312
 313                                         for (Element d : divs.get(2).getElementsByAttribute(
 314                                                         "data-xutime")) {
 315                                                 String secs = d.attr("data-xutime");
 316                                                 try {
 317                                                         String date = new SimpleDateFormat("yyyy-MM-dd")
 318                                                                         .format(new Date(
 319                                                                                         Long.parseLong(secs) * 1000));
 320                                                         // (updated, ) published
 321                                                         if (meta.getDate() != null) {
 322                                                                 tagList.add("Updated: " + meta.getDate());
 323                                                         }
 324                                                         meta.setDate(date);
 325                                                 } catch (Exception e) {
 326                                                 }
 327                                         }
 328                                 }
 329
 330                                 meta.setResume(getSupport().makeChapter(new URL(sourceUrl), 0,
 331                                                 Instance.getTrans().getString(StringId.DESCRIPTION),
 332                                                 resume));
 333                         }
 334
 335                         // How are the tags ordered?
 336                         // We have "Rated: xx", then the language, then all other tags
 337                         // If the subject(s) is/are present, they are before "Rated: xx"
 338
 339                         // /////////////
 340                         // Examples: //
 341                         // /////////////
 342
 343                         // Search (Luna) Tags: [Harry Potter, Rated: T, English, Chapters:
 344                         // 1, Words: 270, Reviews: 2, Published: 2/19/2013, Luna L.]
 345
 346                         // Normal (MLP) Tags: [Rated: T, Spanish, Drama/Suspense, Chapters:
 347                         // 2, Words: 8,686, Reviews: 1, Favs: 1, Follows: 1, Updated: 4/7,
 348                         // Published:
 349                         // 4/2]
 350
 351                         // Crossover (MLP/Who) Tags: [Rated: K+, English, Adventure/Romance,
 352                         // Chapters: 8, Words: 7,788, Reviews: 2, Favs: 2, Follows: 1,
 353                         // Published:
 354                         // 9/1/2016]
 355
 356                         boolean rated = false;
 357                         boolean isLang = false;
 358                         String[] tab = tags.split("  *-  *");
 359                         for (int i = 0; i < tab.length; i++) {
 360                                 String tag = tab[i];
 361                                 if (tag.startsWith("Rated: ")) {
 362                                         rated = true;
 363                                 }
 364
 365                                 if (!rated) {
 366                                         if (!subject.isEmpty()) {
 367                                                 subject += ", ";
 368                                         }
 369                                         subject += tag;
 370                                 } else if (isLang) {
 371                                         meta.setLang(tag);
 372                                         isLang = false;
 373                                 } else {
 374                                         if (tag.contains(":")) {
 375                                                 // Handle special tags:
 376                                                 if (tag.startsWith("Words: ")) {
 377                                                         try {
 378                                                                 meta.setWords(Long.parseLong(tag
 379                                                                                 .substring("Words: ".length())
 380                                                                                 .replace(",", "").trim()));
 381                                                         } catch (Exception e) {
 382                                                         }
 383                                                 } else if (tag.startsWith("Rated: ")) {
 384                                                         tagList.add(tag);
 385                                                 }
 386                                         } else {
 387                                                 for (String t : tag.split("/")) {
 388                                                         tagList.add(t);
 389                                                 }
 390                                         }
 391
 392                                         if (tag.startsWith("Rated: ")) {
 393                                                 isLang = true;
 394                                         }
 395                                 }
 396                         }
 397
 398                         meta.setSubject(subject);
 399                         meta.setTags(tagList);
 400
 401                         metas.add(meta);
 402                 }
 403
 404                 return metas;
 405         }
 406 }