362b5433d7d3fc81e163a03a08273e9f2319f03f
[fanfix.git] / src / be / nikiroo / fanfix / searchable / Fanfiction.java
1 package be.nikiroo.fanfix.searchable;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.URL;
6 import java.net.URLEncoder;
7 import java.text.SimpleDateFormat;
8 import java.util.ArrayList;
9 import java.util.Date;
10 import java.util.HashMap;
11 import java.util.List;
12 import java.util.Map;
13
14 import org.jsoup.nodes.Document;
15 import org.jsoup.nodes.Element;
16 import org.jsoup.select.Elements;
17
18 import be.nikiroo.fanfix.Instance;
19 import be.nikiroo.fanfix.bundles.StringId;
20 import be.nikiroo.fanfix.data.MetaData;
21 import be.nikiroo.fanfix.supported.SupportType;
22 import be.nikiroo.utils.Image;
23
24 /**
25 * A {@link BasicSearchable} for Fanfiction.NET.
26 *
27 * @author niki
28 */
29 class Fanfiction extends BasicSearchable {
30 static private String BASE_URL = "http://fanfiction.net/";
31
32 /**
33 * Create a new {@link Fanfiction}.
34 *
35 * @param type
36 * {@link SupportType#FANFICTION}
37 */
38 public Fanfiction(SupportType type) {
39 super(type);
40 }
41
42 @Override
43 public List<SearchableTag> getTags() throws IOException {
44 String storiesName = null;
45 String crossoversName = null;
46 Map<String, String> stories = new HashMap<String, String>();
47 Map<String, String> crossovers = new HashMap<String, String>();
48
49 Document mainPage = load(BASE_URL, true);
50 Element menu = mainPage.getElementsByClass("dropdown").first();
51 if (menu != null) {
52 Element ul = menu.getElementsByClass("dropdown-menu").first();
53 if (ul != null) {
54 Map<String, String> currentList = null;
55 for (Element li : ul.getElementsByTag("li")) {
56 if (li.hasClass("disabled")) {
57 if (storiesName == null) {
58 storiesName = li.text();
59 currentList = stories;
60 } else {
61 crossoversName = li.text();
62 currentList = crossovers;
63 }
64 } else if (currentList != null) {
65 Element a = li.getElementsByTag("a").first();
66 if (a != null) {
67 currentList.put(a.absUrl("href"), a.text());
68 }
69 }
70 }
71 }
72 }
73
74 List<SearchableTag> tags = new ArrayList<SearchableTag>();
75
76 if (storiesName != null) {
77 SearchableTag tag = new SearchableTag(null, storiesName, false);
78 for (String id : stories.keySet()) {
79 tag.add(new SearchableTag(id, stories.get(id), true, false));
80 }
81 tags.add(tag);
82 }
83
84 if (crossoversName != null) {
85 SearchableTag tag = new SearchableTag(null, crossoversName, false);
86 for (String id : crossovers.keySet()) {
87 tag.add(new SearchableTag(id, crossovers.get(id), false, false));
88 }
89 tags.add(tag);
90 }
91
92 return tags;
93 }
94
95 @Override
96 protected void fillTag(SearchableTag tag) throws IOException {
97 if (tag.getId() == null || tag.isComplete()) {
98 return;
99 }
100
101 Document doc = load(tag.getId(), false);
102 Element list = doc.getElementById("list_output");
103 if (list != null) {
104 Element table = list.getElementsByTag("table").first();
105 if (table != null) {
106 for (Element div : table.getElementsByTag("div")) {
107 Element a = div.getElementsByTag("a").first();
108 Element span = div.getElementsByTag("span").first();
109
110 if (a != null) {
111 String subid = a.absUrl("href");
112 boolean crossoverSubtag = subid
113 .contains("/crossovers/");
114
115 SearchableTag subtag = new SearchableTag(subid,
116 a.text(), !crossoverSubtag, !crossoverSubtag);
117
118 tag.add(subtag);
119 if (span != null) {
120 String nr = span.text();
121 if (nr.startsWith("(")) {
122 nr = nr.substring(1);
123 }
124 if (nr.endsWith(")")) {
125 nr = nr.substring(0, nr.length() - 1);
126 }
127 nr = nr.trim();
128
129 long count = 0;
130 try {
131 if (nr.toLowerCase().endsWith("m")) {
132 count = Long.parseLong(nr.substring(0,
133 nr.length() - 1).trim());
134 count *= 1000000;
135 } else if (nr.toLowerCase().endsWith("k")) {
136 count = Long.parseLong(nr.substring(0,
137 nr.length() - 1).trim());
138 count *= 1000;
139 } else {
140 count = Long.parseLong(nr);
141 }
142 } catch (NumberFormatException pe) {
143 }
144
145 subtag.setCount(count);
146 }
147 }
148 }
149 }
150 }
151
152 tag.setComplete(true);
153 }
154
155 @Override
156 public List<MetaData> search(String search) throws IOException {
157 String encoded = URLEncoder.encode(search.toLowerCase(), "utf-8");
158 return getStories(
159 "http://fanfiction.net/search/?ready=1&type=story&keywords="
160 + encoded, null, null);
161 }
162
163 @Override
164 public List<MetaData> search(SearchableTag tag, int page)
165 throws IOException {
166 List<MetaData> metas = new ArrayList<MetaData>();
167
168 String url = tag.getId();
169 if (url != null) {
170 if (page > 1) {
171 int pos = url.indexOf("&p=");
172 if (pos >= 0) {
173 url = url.replaceAll("(.*\\&p=)[0-9]*(.*)", "$1\\" + page
174 + "$2");
175 } else {
176 url += "&p=" + page;
177 }
178 }
179
180 Document doc = load(url, false);
181
182 // Update the pages number if needed
183 if (tag.getPages() < 0) {
184 tag.setPages(getPages(doc));
185 }
186
187 // Find out the full subjects (including parents)
188 String subjects = "";
189 for (SearchableTag t = tag; t != null; t = t.getParent()) {
190 if (!subjects.isEmpty()) {
191 subjects += ", ";
192 }
193 subjects += t.getName();
194 }
195
196 metas = getStories(url, doc, subjects);
197 }
198
199 return metas;
200 }
201
202 /**
203 * Return the number of pages in this stories result listing.
204 *
205 * @param doc
206 * the document
207 *
208 * @return the number of pages or -1 if unknown
209 *
210 * @throws IOException
211 * in case of I/O errors
212 */
213 private int getPages(Document doc) throws IOException {
214 int pages = -1;
215
216 if (doc != null) {
217 Element center = doc.getElementsByTag("center").first();
218 if (center != null) {
219 for (Element a : center.getElementsByTag("a")) {
220 if (a.absUrl("href").contains("&p=")) {
221 int thisLinkPages = -1;
222 try {
223 String[] tab = a.absUrl("href").split("=");
224 tab = tab[tab.length - 1].split("&");
225 thisLinkPages = Integer
226 .parseInt(tab[tab.length - 1]);
227 } catch (Exception e) {
228 }
229
230 pages = Math.max(pages, thisLinkPages);
231 }
232 }
233 }
234 }
235
236 return pages;
237 }
238
239 /**
240 * Fetch the stories from the given page.
241 *
242 * @param sourceUrl
243 * the url of the document
244 * @param doc
245 * the document to use (if NULL, will be loaded from
246 * <tt>sourceUrl</tt>)
247 * @param mainSubject
248 * the main subject (the anime/book/movie item related to the
249 * stories, like "MLP" or "Doctor Who"), or NULL if none
250 *
251 * @return the stories found in it
252 *
253 * @throws IOException
254 * in case of I/O errors
255 */
256 private List<MetaData> getStories(String sourceUrl, Document doc,
257 String mainSubject) throws IOException {
258 List<MetaData> metas = new ArrayList<MetaData>();
259
260 if (doc == null) {
261 doc = load(sourceUrl, false);
262 }
263
264 for (Element story : doc.getElementsByClass("z-list")) {
265 MetaData meta = new MetaData();
266 meta.setImageDocument(false);
267 meta.setSource(getType().getSourceName());
268
269 String subject = mainSubject == null ? "" : mainSubject;
270 List<String> tagList = new ArrayList<String>();
271
272 Element stitle = story.getElementsByClass("stitle").first();
273 if (stitle != null) {
274 meta.setTitle(stitle.text());
275 meta.setUrl(stitle.absUrl("href"));
276 Element cover = stitle.getElementsByTag("img").first();
277 if (cover != null) {
278 // note: see data-original if needed?
279 String coverUrl = cover.absUrl("src");
280
281 try {
282 InputStream in = Instance.getCache().open(
283 new URL(coverUrl), getSupport(), true);
284 try {
285 meta.setCover(new Image(in));
286 } finally {
287 in.close();
288 }
289 } catch (Exception e) {
290 Instance.getTraceHandler()
291 .error(new Exception(
292 "Cannot download cover for Fanfiction story in search mode",
293 e));
294 }
295 }
296 }
297
298 Elements as = story.getElementsByTag("a");
299 if (as.size() > 1) {
300 meta.setAuthor(as.get(1).text());
301 }
302
303 String tags = "";
304
305 Elements divs = story.getElementsByTag("div");
306 if (divs.size() > 1 && divs.get(1).childNodeSize() > 0) {
307 String resume = divs.get(1).text();
308 if (divs.size() > 2) {
309 tags = divs.get(2).text();
310 resume = resume.substring(0,
311 resume.length() - tags.length()).trim();
312
313 for (Element d : divs.get(2).getElementsByAttribute(
314 "data-xutime")) {
315 String secs = d.attr("data-xutime");
316 try {
317 String date = new SimpleDateFormat("yyyy-MM-dd")
318 .format(new Date(
319 Long.parseLong(secs) * 1000));
320 // (updated, ) published
321 if (meta.getDate() != null) {
322 tagList.add("Updated: " + meta.getDate());
323 }
324 meta.setDate(date);
325 } catch (Exception e) {
326 }
327 }
328 }
329
330 meta.setResume(getSupport().makeChapter(new URL(sourceUrl), 0,
331 Instance.getTrans().getString(StringId.DESCRIPTION),
332 resume));
333 }
334
335 // How are the tags ordered?
336 // We have "Rated: xx", then the language, then all other tags
337 // If the subject(s) is/are present, they are before "Rated: xx"
338
339 // /////////////
340 // Examples: //
341 // /////////////
342
343 // Search (Luna) Tags: [Harry Potter, Rated: T, English, Chapters:
344 // 1, Words: 270, Reviews: 2, Published: 2/19/2013, Luna L.]
345
346 // Normal (MLP) Tags: [Rated: T, Spanish, Drama/Suspense, Chapters:
347 // 2, Words: 8,686, Reviews: 1, Favs: 1, Follows: 1, Updated: 4/7,
348 // Published:
349 // 4/2]
350
351 // Crossover (MLP/Who) Tags: [Rated: K+, English, Adventure/Romance,
352 // Chapters: 8, Words: 7,788, Reviews: 2, Favs: 2, Follows: 1,
353 // Published:
354 // 9/1/2016]
355
356 boolean rated = false;
357 boolean isLang = false;
358 String[] tab = tags.split(" *- *");
359 for (int i = 0; i < tab.length; i++) {
360 String tag = tab[i];
361 if (tag.startsWith("Rated: ")) {
362 rated = true;
363 }
364
365 if (!rated) {
366 if (!subject.isEmpty()) {
367 subject += ", ";
368 }
369 subject += tag;
370 } else if (isLang) {
371 meta.setLang(tag);
372 isLang = false;
373 } else {
374 if (tag.contains(":")) {
375 // Handle special tags:
376 if (tag.startsWith("Words: ")) {
377 try {
378 meta.setWords(Long.parseLong(tag
379 .substring("Words: ".length())
380 .replace(",", "").trim()));
381 } catch (Exception e) {
382 }
383 } else if (tag.startsWith("Rated: ")) {
384 tagList.add(tag);
385 }
386 } else {
387 for (String t : tag.split("/")) {
388 tagList.add(t);
389 }
390 }
391
392 if (tag.startsWith("Rated: ")) {
393 isLang = true;
394 }
395 }
396 }
397
398 meta.setSubject(subject);
399 meta.setTags(tagList);
400
401 metas.add(meta);
402 }
403
404 return metas;
405 }
406 }