| 1 | package be.nikiroo.fanfix.searchable; |
| 2 | |
| 3 | import java.io.IOException; |
| 4 | import java.io.InputStream; |
| 5 | import java.net.URL; |
| 6 | import java.net.URLEncoder; |
| 7 | import java.text.SimpleDateFormat; |
| 8 | import java.util.ArrayList; |
| 9 | import java.util.Date; |
| 10 | import java.util.HashMap; |
| 11 | import java.util.List; |
| 12 | import java.util.Map; |
| 13 | |
| 14 | import org.jsoup.nodes.Document; |
| 15 | import org.jsoup.nodes.Element; |
| 16 | import org.jsoup.select.Elements; |
| 17 | |
| 18 | import be.nikiroo.fanfix.Instance; |
| 19 | import be.nikiroo.fanfix.bundles.StringId; |
| 20 | import be.nikiroo.fanfix.data.MetaData; |
| 21 | import be.nikiroo.fanfix.supported.SupportType; |
| 22 | import be.nikiroo.utils.Image; |
| 23 | import be.nikiroo.utils.StringUtils; |
| 24 | |
| 25 | /** |
| 26 | * A {@link BasicSearchable} for Fanfiction.NET. |
| 27 | * |
| 28 | * @author niki |
| 29 | */ |
| 30 | class Fanfiction extends BasicSearchable { |
| 31 | static private String BASE_URL = "http://fanfiction.net/"; |
| 32 | |
| 33 | /** |
| 34 | * Create a new {@link Fanfiction}. |
| 35 | * |
| 36 | * @param type |
| 37 | * {@link SupportType#FANFICTION} |
| 38 | */ |
| 39 | public Fanfiction(SupportType type) { |
| 40 | super(type); |
| 41 | } |
| 42 | |
| 43 | @Override |
| 44 | public List<SearchableTag> getTags() throws IOException { |
| 45 | String storiesName = null; |
| 46 | String crossoversName = null; |
| 47 | Map<String, String> stories = new HashMap<String, String>(); |
| 48 | Map<String, String> crossovers = new HashMap<String, String>(); |
| 49 | |
| 50 | Document mainPage = load(BASE_URL, true); |
| 51 | Element menu = mainPage.getElementsByClass("dropdown").first(); |
| 52 | if (menu != null) { |
| 53 | Element ul = menu.getElementsByClass("dropdown-menu").first(); |
| 54 | if (ul != null) { |
| 55 | Map<String, String> currentList = null; |
| 56 | for (Element li : ul.getElementsByTag("li")) { |
| 57 | if (li.hasClass("disabled")) { |
| 58 | if (storiesName == null) { |
| 59 | storiesName = li.text(); |
| 60 | currentList = stories; |
| 61 | } else { |
| 62 | crossoversName = li.text(); |
| 63 | currentList = crossovers; |
| 64 | } |
| 65 | } else if (currentList != null) { |
| 66 | Element a = li.getElementsByTag("a").first(); |
| 67 | if (a != null) { |
| 68 | currentList.put(a.absUrl("href"), a.text()); |
| 69 | } |
| 70 | } |
| 71 | } |
| 72 | } |
| 73 | } |
| 74 | |
| 75 | List<SearchableTag> tags = new ArrayList<SearchableTag>(); |
| 76 | |
| 77 | if (storiesName != null) { |
| 78 | SearchableTag tag = new SearchableTag(null, storiesName, false); |
| 79 | for (String id : stories.keySet()) { |
| 80 | tag.add(new SearchableTag(id, stories.get(id), false, false)); |
| 81 | } |
| 82 | tags.add(tag); |
| 83 | } |
| 84 | |
| 85 | if (crossoversName != null) { |
| 86 | SearchableTag tag = new SearchableTag(null, crossoversName, false); |
| 87 | for (String id : crossovers.keySet()) { |
| 88 | tag.add(new SearchableTag(id, crossovers.get(id), false, false)); |
| 89 | } |
| 90 | tags.add(tag); |
| 91 | } |
| 92 | |
| 93 | return tags; |
| 94 | } |
| 95 | |
| 96 | @Override |
| 97 | public void fillTag(SearchableTag tag) throws IOException { |
| 98 | if (tag.getId() == null || tag.isComplete()) { |
| 99 | return; |
| 100 | } |
| 101 | |
| 102 | Document doc = load(tag.getId(), false); |
| 103 | Element list = doc.getElementById("list_output"); |
| 104 | if (list != null) { |
| 105 | Element table = list.getElementsByTag("table").first(); |
| 106 | if (table != null) { |
| 107 | for (Element div : table.getElementsByTag("div")) { |
| 108 | Element a = div.getElementsByTag("a").first(); |
| 109 | Element span = div.getElementsByTag("span").first(); |
| 110 | |
| 111 | if (a != null) { |
| 112 | String subid = a.absUrl("href"); |
| 113 | boolean crossoverSubtag = subid |
| 114 | .contains("/crossovers/"); |
| 115 | |
| 116 | SearchableTag subtag = new SearchableTag(subid, |
| 117 | a.text(), !crossoverSubtag, !crossoverSubtag); |
| 118 | |
| 119 | tag.add(subtag); |
| 120 | if (span != null) { |
| 121 | String nr = span.text(); |
| 122 | if (nr.startsWith("(")) { |
| 123 | nr = nr.substring(1); |
| 124 | } |
| 125 | if (nr.endsWith(")")) { |
| 126 | nr = nr.substring(0, nr.length() - 1); |
| 127 | } |
| 128 | nr = nr.trim(); |
| 129 | |
| 130 | // TODO: fix toNumber/fromNumber |
| 131 | nr = nr.replaceAll("\\.[0-9]*", ""); |
| 132 | |
| 133 | subtag.setCount(StringUtils.toNumber(nr)); |
| 134 | } |
| 135 | } |
| 136 | } |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | tag.setComplete(true); |
| 141 | } |
| 142 | |
| 143 | @Override |
| 144 | public List<MetaData> search(String search, int page) throws IOException { |
| 145 | String encoded = URLEncoder.encode(search.toLowerCase(), "utf-8"); |
| 146 | String url = BASE_URL + "search/?ready=1&type=story&keywords=" |
| 147 | + encoded + "&ppage=" + page; |
| 148 | |
| 149 | return getStories(url, null, null); |
| 150 | } |
| 151 | |
| 152 | @Override |
| 153 | public List<MetaData> search(SearchableTag tag, int page) |
| 154 | throws IOException { |
| 155 | List<MetaData> metas = new ArrayList<MetaData>(); |
| 156 | |
| 157 | String url = tag.getId(); |
| 158 | if (url != null) { |
| 159 | if (page > 1) { |
| 160 | int pos = url.indexOf("&p="); |
| 161 | if (pos >= 0) { |
| 162 | url = url.replaceAll("(.*\\&p=)[0-9]*(.*)", "$1\\" + page |
| 163 | + "$2"); |
| 164 | } else { |
| 165 | url += "&p=" + page; |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | Document doc = load(url, false); |
| 170 | |
| 171 | // Update the pages number if needed |
| 172 | if (tag.getPages() < 0 && tag.isLeaf()) { |
| 173 | tag.setPages(getPages(doc)); |
| 174 | } |
| 175 | |
| 176 | // Find out the full subjects (including parents) |
| 177 | String subjects = ""; |
| 178 | for (SearchableTag t = tag; t != null; t = t.getParent()) { |
| 179 | if (!subjects.isEmpty()) { |
| 180 | subjects += ", "; |
| 181 | } |
| 182 | subjects += t.getName(); |
| 183 | } |
| 184 | |
| 185 | metas = getStories(url, doc, subjects); |
| 186 | } |
| 187 | |
| 188 | return metas; |
| 189 | } |
| 190 | |
| 191 | @Override |
| 192 | public int searchPages(String search) throws IOException { |
| 193 | String encoded = URLEncoder.encode(search.toLowerCase(), "utf-8"); |
| 194 | String url = BASE_URL + "search/?ready=1&type=story&keywords=" |
| 195 | + encoded; |
| 196 | |
| 197 | return getPages(load(url, false)); |
| 198 | } |
| 199 | |
| 200 | @Override |
| 201 | public int searchPages(SearchableTag tag) throws IOException { |
| 202 | if (tag.isLeaf()) { |
| 203 | String url = tag.getId(); |
| 204 | return getPages(load(url, false)); |
| 205 | } |
| 206 | |
| 207 | return 0; |
| 208 | } |
| 209 | |
| 210 | /** |
| 211 | * Return the number of pages in this stories result listing. |
| 212 | * |
| 213 | * @param doc |
| 214 | * the document |
| 215 | * |
| 216 | * @return the number of pages or -1 if unknown |
| 217 | */ |
| 218 | private int getPages(Document doc) { |
| 219 | int pages = -1; |
| 220 | |
| 221 | if (doc != null) { |
| 222 | Element center = doc.getElementsByTag("center").first(); |
| 223 | if (center != null) { |
| 224 | for (Element a : center.getElementsByTag("a")) { |
| 225 | if (a.absUrl("href").contains("&p=")) { |
| 226 | int thisLinkPages = -1; |
| 227 | try { |
| 228 | String[] tab = a.absUrl("href").split("="); |
| 229 | tab = tab[tab.length - 1].split("&"); |
| 230 | thisLinkPages = Integer |
| 231 | .parseInt(tab[tab.length - 1]); |
| 232 | } catch (Exception e) { |
| 233 | } |
| 234 | |
| 235 | pages = Math.max(pages, thisLinkPages); |
| 236 | } |
| 237 | } |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | return pages; |
| 242 | } |
| 243 | |
| 244 | /** |
| 245 | * Fetch the stories from the given page. |
| 246 | * |
| 247 | * @param sourceUrl |
| 248 | * the url of the document |
| 249 | * @param doc |
| 250 | * the document to use (if NULL, will be loaded from |
| 251 | * <tt>sourceUrl</tt>) |
| 252 | * @param mainSubject |
| 253 | * the main subject (the anime/book/movie item related to the |
| 254 | * stories, like "MLP" or "Doctor Who"), or NULL if none |
| 255 | * |
| 256 | * @return the stories found in it |
| 257 | * |
| 258 | * @throws IOException |
| 259 | * in case of I/O errors |
| 260 | */ |
| 261 | private List<MetaData> getStories(String sourceUrl, Document doc, |
| 262 | String mainSubject) throws IOException { |
| 263 | List<MetaData> metas = new ArrayList<MetaData>(); |
| 264 | |
| 265 | if (doc == null) { |
| 266 | doc = load(sourceUrl, false); |
| 267 | } |
| 268 | |
| 269 | for (Element story : doc.getElementsByClass("z-list")) { |
| 270 | MetaData meta = new MetaData(); |
| 271 | meta.setImageDocument(false); |
| 272 | meta.setSource(getType().getSourceName()); |
| 273 | meta.setPublisher(getType().getSourceName()); |
| 274 | meta.setType(getType().toString()); |
| 275 | |
| 276 | // Title, URL, Cover |
| 277 | Element stitle = story.getElementsByClass("stitle").first(); |
| 278 | if (stitle != null) { |
| 279 | meta.setTitle(stitle.text()); |
| 280 | meta.setUrl(stitle.absUrl("href")); |
| 281 | meta.setUuid(meta.getUrl()); |
| 282 | Element cover = stitle.getElementsByTag("img").first(); |
| 283 | if (cover != null) { |
| 284 | // note: see data-original if needed? |
| 285 | String coverUrl = cover.absUrl("src"); |
| 286 | |
| 287 | try { |
| 288 | InputStream in = Instance.getInstance().getCache().open(new URL(coverUrl), getSupport(), true); |
| 289 | try { |
| 290 | meta.setCover(new Image(in)); |
| 291 | } finally { |
| 292 | in.close(); |
| 293 | } |
| 294 | } catch (Exception e) { |
| 295 | // Should not happen on Fanfiction.net |
| 296 | Instance.getInstance().getTraceHandler().error(new Exception( |
| 297 | "Cannot download cover for Fanfiction story in search mode: " + meta.getTitle(), e)); |
| 298 | } |
| 299 | } |
| 300 | } |
| 301 | |
| 302 | // Author |
| 303 | Elements as = story.getElementsByTag("a"); |
| 304 | if (as.size() > 1) { |
| 305 | meta.setAuthor(as.get(1).text()); |
| 306 | } |
| 307 | |
| 308 | // Tags (concatenated text), published date, updated date, Resume |
| 309 | String tags = ""; |
| 310 | List<String> tagList = new ArrayList<String>(); |
| 311 | Elements divs = story.getElementsByTag("div"); |
| 312 | if (divs.size() > 1 && divs.get(1).childNodeSize() > 0) { |
| 313 | String resume = divs.get(1).text(); |
| 314 | if (divs.size() > 2) { |
| 315 | tags = divs.get(2).text(); |
| 316 | resume = resume.substring(0, |
| 317 | resume.length() - tags.length()).trim(); |
| 318 | |
| 319 | for (Element d : divs.get(2).getElementsByAttribute( |
| 320 | "data-xutime")) { |
| 321 | String secs = d.attr("data-xutime"); |
| 322 | try { |
| 323 | String date = new SimpleDateFormat("yyyy-MM-dd") |
| 324 | .format(new Date( |
| 325 | Long.parseLong(secs) * 1000)); |
| 326 | // (updated, ) published |
| 327 | if (meta.getDate() != null) { |
| 328 | tagList.add("Updated: " + meta.getDate()); |
| 329 | } |
| 330 | meta.setDate(date); |
| 331 | } catch (Exception e) { |
| 332 | } |
| 333 | } |
| 334 | } |
| 335 | |
| 336 | meta.setResume(getSupport().makeChapter(new URL(sourceUrl), 0, |
| 337 | Instance.getInstance().getTrans().getString(StringId.DESCRIPTION), resume)); |
| 338 | } |
| 339 | |
| 340 | // How are the tags ordered? |
| 341 | // We have "Rated: xx", then the language, then all other tags |
| 342 | // If the subject(s) is/are present, they are before "Rated: xx" |
| 343 | |
| 344 | // //////////// |
| 345 | // Examples: // |
| 346 | // //////////// |
| 347 | |
| 348 | // Search (Luna) Tags: [Harry Potter, Rated: T, English, Chapters: |
| 349 | // 1, Words: 270, Reviews: 2, Published: 2/19/2013, Luna L.] |
| 350 | |
| 351 | // Normal (MLP) Tags: [Rated: T, Spanish, Drama/Suspense, Chapters: |
| 352 | // 2, Words: 8,686, Reviews: 1, Favs: 1, Follows: 1, Updated: 4/7, |
| 353 | // Published: 4/2] |
| 354 | |
| 355 | // Crossover (MLP/Who) Tags: [Rated: K+, English, Adventure/Romance, |
| 356 | // Chapters: 8, Words: 7,788, Reviews: 2, Favs: 2, Follows: 1, |
| 357 | // Published: 9/1/2016] |
| 358 | |
| 359 | boolean rated = false; |
| 360 | boolean isLang = false; |
| 361 | String subject = mainSubject == null ? "" : mainSubject; |
| 362 | String[] tab = tags.split(" *- *"); |
| 363 | for (int i = 0; i < tab.length; i++) { |
| 364 | String tag = tab[i]; |
| 365 | if (tag.startsWith("Rated: ")) { |
| 366 | rated = true; |
| 367 | } |
| 368 | |
| 369 | if (!rated) { |
| 370 | if (!subject.isEmpty()) { |
| 371 | subject += ", "; |
| 372 | } |
| 373 | subject += tag; |
| 374 | } else if (isLang) { |
| 375 | meta.setLang(tag); |
| 376 | isLang = false; |
| 377 | } else { |
| 378 | if (tag.contains(":")) { |
| 379 | // Handle special tags: |
| 380 | if (tag.startsWith("Words: ")) { |
| 381 | try { |
| 382 | meta.setWords(Long.parseLong(tag |
| 383 | .substring("Words: ".length()) |
| 384 | .replace(",", "").trim())); |
| 385 | } catch (Exception e) { |
| 386 | } |
| 387 | } else if (tag.startsWith("Rated: ")) { |
| 388 | tagList.add(tag); |
| 389 | } |
| 390 | } else { |
| 391 | // Normal tags are "/"-separated |
| 392 | for (String t : tag.split("/")) { |
| 393 | tagList.add(t); |
| 394 | } |
| 395 | } |
| 396 | |
| 397 | if (tag.startsWith("Rated: ")) { |
| 398 | isLang = true; |
| 399 | } |
| 400 | } |
| 401 | } |
| 402 | |
| 403 | meta.setSubject(subject); |
| 404 | meta.setTags(tagList); |
| 405 | |
| 406 | metas.add(meta); |
| 407 | } |
| 408 | |
| 409 | return metas; |
| 410 | } |
| 411 | } |