| 1 | package be.nikiroo.fanfix.searchable; |
| 2 | |
| 3 | import java.io.IOException; |
| 4 | import java.io.InputStream; |
| 5 | import java.net.URL; |
| 6 | import java.net.URLEncoder; |
| 7 | import java.text.SimpleDateFormat; |
| 8 | import java.util.ArrayList; |
| 9 | import java.util.Date; |
| 10 | import java.util.HashMap; |
| 11 | import java.util.List; |
| 12 | import java.util.Map; |
| 13 | |
| 14 | import org.jsoup.nodes.Document; |
| 15 | import org.jsoup.nodes.Element; |
| 16 | import org.jsoup.select.Elements; |
| 17 | |
| 18 | import be.nikiroo.fanfix.Instance; |
| 19 | import be.nikiroo.fanfix.bundles.StringId; |
| 20 | import be.nikiroo.fanfix.data.MetaData; |
| 21 | import be.nikiroo.fanfix.supported.SupportType; |
| 22 | import be.nikiroo.utils.Image; |
| 23 | |
| 24 | /** |
| 25 | * A {@link BasicSearchable} for Fanfiction.NET. |
| 26 | * |
| 27 | * @author niki |
| 28 | */ |
| 29 | class Fanfiction extends BasicSearchable { |
| 30 | static private String BASE_URL = "http://fanfiction.net/"; |
| 31 | |
| 32 | /** |
| 33 | * Create a new {@link Fanfiction}. |
| 34 | * |
| 35 | * @param type |
| 36 | * {@link SupportType#FANFICTION} |
| 37 | */ |
| 38 | public Fanfiction(SupportType type) { |
| 39 | super(type); |
| 40 | } |
| 41 | |
| 42 | @Override |
| 43 | public List<SearchableTag> getTags() throws IOException { |
| 44 | String storiesName = null; |
| 45 | String crossoversName = null; |
| 46 | Map<String, String> stories = new HashMap<String, String>(); |
| 47 | Map<String, String> crossovers = new HashMap<String, String>(); |
| 48 | |
| 49 | Document mainPage = load(BASE_URL, true); |
| 50 | Element menu = mainPage.getElementsByClass("dropdown").first(); |
| 51 | if (menu != null) { |
| 52 | Element ul = menu.getElementsByClass("dropdown-menu").first(); |
| 53 | if (ul != null) { |
| 54 | Map<String, String> currentList = null; |
| 55 | for (Element li : ul.getElementsByTag("li")) { |
| 56 | if (li.hasClass("disabled")) { |
| 57 | if (storiesName == null) { |
| 58 | storiesName = li.text(); |
| 59 | currentList = stories; |
| 60 | } else { |
| 61 | crossoversName = li.text(); |
| 62 | currentList = crossovers; |
| 63 | } |
| 64 | } else if (currentList != null) { |
| 65 | Element a = li.getElementsByTag("a").first(); |
| 66 | if (a != null) { |
| 67 | currentList.put(a.absUrl("href"), a.text()); |
| 68 | } |
| 69 | } |
| 70 | } |
| 71 | } |
| 72 | } |
| 73 | |
| 74 | List<SearchableTag> tags = new ArrayList<SearchableTag>(); |
| 75 | |
| 76 | if (storiesName != null) { |
| 77 | SearchableTag tag = new SearchableTag(null, storiesName, false); |
| 78 | for (String id : stories.keySet()) { |
| 79 | tag.add(new SearchableTag(id, stories.get(id), true, false)); |
| 80 | } |
| 81 | tags.add(tag); |
| 82 | } |
| 83 | |
| 84 | if (crossoversName != null) { |
| 85 | SearchableTag tag = new SearchableTag(null, crossoversName, false); |
| 86 | for (String id : crossovers.keySet()) { |
| 87 | tag.add(new SearchableTag(id, crossovers.get(id), false, false)); |
| 88 | } |
| 89 | tags.add(tag); |
| 90 | } |
| 91 | |
| 92 | return tags; |
| 93 | } |
| 94 | |
| 95 | @Override |
| 96 | protected void fillTag(SearchableTag tag) throws IOException { |
| 97 | if (tag.getId() == null || tag.isComplete()) { |
| 98 | return; |
| 99 | } |
| 100 | |
| 101 | Document doc = load(tag.getId(), false); |
| 102 | Element list = doc.getElementById("list_output"); |
| 103 | if (list != null) { |
| 104 | Element table = list.getElementsByTag("table").first(); |
| 105 | if (table != null) { |
| 106 | for (Element div : table.getElementsByTag("div")) { |
| 107 | Element a = div.getElementsByTag("a").first(); |
| 108 | Element span = div.getElementsByTag("span").first(); |
| 109 | |
| 110 | if (a != null) { |
| 111 | String subid = a.absUrl("href"); |
| 112 | boolean crossoverSubtag = subid |
| 113 | .contains("/crossovers/"); |
| 114 | |
| 115 | SearchableTag subtag = new SearchableTag(subid, |
| 116 | a.text(), !crossoverSubtag, !crossoverSubtag); |
| 117 | |
| 118 | tag.add(subtag); |
| 119 | if (span != null) { |
| 120 | String nr = span.text(); |
| 121 | if (nr.startsWith("(")) { |
| 122 | nr = nr.substring(1); |
| 123 | } |
| 124 | if (nr.endsWith(")")) { |
| 125 | nr = nr.substring(0, nr.length() - 1); |
| 126 | } |
| 127 | nr = nr.trim(); |
| 128 | |
| 129 | long count = 0; |
| 130 | try { |
| 131 | if (nr.toLowerCase().endsWith("m")) { |
| 132 | count = Long.parseLong(nr.substring(0, |
| 133 | nr.length() - 1).trim()); |
| 134 | count *= 1000000; |
| 135 | } else if (nr.toLowerCase().endsWith("k")) { |
| 136 | count = Long.parseLong(nr.substring(0, |
| 137 | nr.length() - 1).trim()); |
| 138 | count *= 1000; |
| 139 | } else { |
| 140 | count = Long.parseLong(nr); |
| 141 | } |
| 142 | } catch (NumberFormatException pe) { |
| 143 | } |
| 144 | |
| 145 | subtag.setCount(count); |
| 146 | } |
| 147 | } |
| 148 | } |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | tag.setComplete(true); |
| 153 | } |
| 154 | |
| 155 | @Override |
| 156 | public List<MetaData> search(String search) throws IOException { |
| 157 | String encoded = URLEncoder.encode(search.toLowerCase(), "utf-8"); |
| 158 | return getStories( |
| 159 | "http://fanfiction.net/search/?ready=1&type=story&keywords=" |
| 160 | + encoded, null, null); |
| 161 | } |
| 162 | |
| 163 | @Override |
| 164 | public List<MetaData> search(SearchableTag tag, int page) |
| 165 | throws IOException { |
| 166 | List<MetaData> metas = new ArrayList<MetaData>(); |
| 167 | |
| 168 | String url = tag.getId(); |
| 169 | if (url != null) { |
| 170 | if (page > 1) { |
| 171 | int pos = url.indexOf("&p="); |
| 172 | if (pos >= 0) { |
| 173 | url = url.replaceAll("(.*\\&p=)[0-9]*(.*)", "$1\\" + page |
| 174 | + "$2"); |
| 175 | } else { |
| 176 | url += "&p=" + page; |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | Document doc = load(url, false); |
| 181 | |
| 182 | // Update the pages number if needed |
| 183 | if (tag.getPages() < 0) { |
| 184 | tag.setPages(getPages(doc)); |
| 185 | } |
| 186 | |
| 187 | // Find out the full subjects (including parents) |
| 188 | String subjects = ""; |
| 189 | for (SearchableTag t = tag; t != null; t = t.getParent()) { |
| 190 | if (!subjects.isEmpty()) { |
| 191 | subjects += ", "; |
| 192 | } |
| 193 | subjects += t.getName(); |
| 194 | } |
| 195 | |
| 196 | metas = getStories(url, doc, subjects); |
| 197 | } |
| 198 | |
| 199 | return metas; |
| 200 | } |
| 201 | |
| 202 | /** |
| 203 | * Return the number of pages in this stories result listing. |
| 204 | * |
| 205 | * @param doc |
| 206 | * the document |
| 207 | * |
| 208 | * @return the number of pages or -1 if unknown |
| 209 | * |
| 210 | * @throws IOException |
| 211 | * in case of I/O errors |
| 212 | */ |
| 213 | private int getPages(Document doc) throws IOException { |
| 214 | int pages = -1; |
| 215 | |
| 216 | if (doc != null) { |
| 217 | Element center = doc.getElementsByTag("center").first(); |
| 218 | if (center != null) { |
| 219 | for (Element a : center.getElementsByTag("a")) { |
| 220 | if (a.absUrl("href").contains("&p=")) { |
| 221 | int thisLinkPages = -1; |
| 222 | try { |
| 223 | String[] tab = a.absUrl("href").split("="); |
| 224 | tab = tab[tab.length - 1].split("&"); |
| 225 | thisLinkPages = Integer |
| 226 | .parseInt(tab[tab.length - 1]); |
| 227 | } catch (Exception e) { |
| 228 | } |
| 229 | |
| 230 | pages = Math.max(pages, thisLinkPages); |
| 231 | } |
| 232 | } |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | return pages; |
| 237 | } |
| 238 | |
| 239 | /** |
| 240 | * Fetch the stories from the given page. |
| 241 | * |
| 242 | * @param sourceUrl |
| 243 | * the url of the document |
| 244 | * @param doc |
| 245 | * the document to use (if NULL, will be loaded from |
| 246 | * <tt>sourceUrl</tt>) |
| 247 | * @param mainSubject |
| 248 | * the main subject (the anime/book/movie item related to the |
| 249 | * stories, like "MLP" or "Doctor Who"), or NULL if none |
| 250 | * |
| 251 | * @return the stories found in it |
| 252 | * |
| 253 | * @throws IOException |
| 254 | * in case of I/O errors |
| 255 | */ |
| 256 | private List<MetaData> getStories(String sourceUrl, Document doc, |
| 257 | String mainSubject) throws IOException { |
| 258 | List<MetaData> metas = new ArrayList<MetaData>(); |
| 259 | |
| 260 | if (doc == null) { |
| 261 | doc = load(sourceUrl, false); |
| 262 | } |
| 263 | |
| 264 | for (Element story : doc.getElementsByClass("z-list")) { |
| 265 | MetaData meta = new MetaData(); |
| 266 | meta.setImageDocument(false); |
| 267 | meta.setSource(getType().getSourceName()); |
| 268 | |
| 269 | String subject = mainSubject == null ? "" : mainSubject; |
| 270 | List<String> tagList = new ArrayList<String>(); |
| 271 | |
| 272 | Element stitle = story.getElementsByClass("stitle").first(); |
| 273 | if (stitle != null) { |
| 274 | meta.setTitle(stitle.text()); |
| 275 | meta.setUrl(stitle.absUrl("href")); |
| 276 | Element cover = stitle.getElementsByTag("img").first(); |
| 277 | if (cover != null) { |
| 278 | // note: see data-original if needed? |
| 279 | String coverUrl = cover.absUrl("src"); |
| 280 | |
| 281 | try { |
| 282 | InputStream in = Instance.getCache().open( |
| 283 | new URL(coverUrl), getSupport(), true); |
| 284 | try { |
| 285 | meta.setCover(new Image(in)); |
| 286 | } finally { |
| 287 | in.close(); |
| 288 | } |
| 289 | } catch (Exception e) { |
| 290 | Instance.getTraceHandler() |
| 291 | .error(new Exception( |
| 292 | "Cannot download cover for Fanfiction story in search mode", |
| 293 | e)); |
| 294 | } |
| 295 | } |
| 296 | } |
| 297 | |
| 298 | Elements as = story.getElementsByTag("a"); |
| 299 | if (as.size() > 1) { |
| 300 | meta.setAuthor(as.get(1).text()); |
| 301 | } |
| 302 | |
| 303 | String tags = ""; |
| 304 | |
| 305 | Elements divs = story.getElementsByTag("div"); |
| 306 | if (divs.size() > 1 && divs.get(1).childNodeSize() > 0) { |
| 307 | String resume = divs.get(1).text(); |
| 308 | if (divs.size() > 2) { |
| 309 | tags = divs.get(2).text(); |
| 310 | resume = resume.substring(0, |
| 311 | resume.length() - tags.length()).trim(); |
| 312 | |
| 313 | for (Element d : divs.get(2).getElementsByAttribute( |
| 314 | "data-xutime")) { |
| 315 | String secs = d.attr("data-xutime"); |
| 316 | try { |
| 317 | String date = new SimpleDateFormat("yyyy-MM-dd") |
| 318 | .format(new Date( |
| 319 | Long.parseLong(secs) * 1000)); |
| 320 | // (updated, ) published |
| 321 | if (meta.getDate() != null) { |
| 322 | tagList.add("Updated: " + meta.getDate()); |
| 323 | } |
| 324 | meta.setDate(date); |
| 325 | } catch (Exception e) { |
| 326 | } |
| 327 | } |
| 328 | } |
| 329 | |
| 330 | meta.setResume(getSupport().makeChapter(new URL(sourceUrl), 0, |
| 331 | Instance.getTrans().getString(StringId.DESCRIPTION), |
| 332 | resume)); |
| 333 | } |
| 334 | |
| 335 | // How are the tags ordered? |
| 336 | // We have "Rated: xx", then the language, then all other tags |
| 337 | // If the subject(s) is/are present, they are before "Rated: xx" |
| 338 | |
| 339 | // ///////////// |
| 340 | // Examples: // |
| 341 | // ///////////// |
| 342 | |
| 343 | // Search (Luna) Tags: [Harry Potter, Rated: T, English, Chapters: |
| 344 | // 1, Words: 270, Reviews: 2, Published: 2/19/2013, Luna L.] |
| 345 | |
| 346 | // Normal (MLP) Tags: [Rated: T, Spanish, Drama/Suspense, Chapters: |
| 347 | // 2, Words: 8,686, Reviews: 1, Favs: 1, Follows: 1, Updated: 4/7, |
| 348 | // Published: |
| 349 | // 4/2] |
| 350 | |
| 351 | // Crossover (MLP/Who) Tags: [Rated: K+, English, Adventure/Romance, |
| 352 | // Chapters: 8, Words: 7,788, Reviews: 2, Favs: 2, Follows: 1, |
| 353 | // Published: |
| 354 | // 9/1/2016] |
| 355 | |
| 356 | boolean rated = false; |
| 357 | boolean isLang = false; |
| 358 | String[] tab = tags.split(" *- *"); |
| 359 | for (int i = 0; i < tab.length; i++) { |
| 360 | String tag = tab[i]; |
| 361 | if (tag.startsWith("Rated: ")) { |
| 362 | rated = true; |
| 363 | } |
| 364 | |
| 365 | if (!rated) { |
| 366 | if (!subject.isEmpty()) { |
| 367 | subject += ", "; |
| 368 | } |
| 369 | subject += tag; |
| 370 | } else if (isLang) { |
| 371 | meta.setLang(tag); |
| 372 | isLang = false; |
| 373 | } else { |
| 374 | if (tag.contains(":")) { |
| 375 | // Handle special tags: |
| 376 | if (tag.startsWith("Words: ")) { |
| 377 | try { |
| 378 | meta.setWords(Long.parseLong(tag |
| 379 | .substring("Words: ".length()) |
| 380 | .replace(",", "").trim())); |
| 381 | } catch (Exception e) { |
| 382 | } |
| 383 | } else if (tag.startsWith("Rated: ")) { |
| 384 | tagList.add(tag); |
| 385 | } |
| 386 | } else { |
| 387 | for (String t : tag.split("/")) { |
| 388 | tagList.add(t); |
| 389 | } |
| 390 | } |
| 391 | |
| 392 | if (tag.startsWith("Rated: ")) { |
| 393 | isLang = true; |
| 394 | } |
| 395 | } |
| 396 | } |
| 397 | |
| 398 | meta.setSubject(subject); |
| 399 | meta.setTags(tagList); |
| 400 | |
| 401 | metas.add(meta); |
| 402 | } |
| 403 | |
| 404 | return metas; |
| 405 | } |
| 406 | } |