Merge branch 'master' into search
[fanfix.git] / src / be / nikiroo / fanfix / searchable / Fanfiction.java
CommitLineData
158b372d
NR
1package be.nikiroo.fanfix.searchable;
2
3import java.io.IOException;
e66c9078 4import java.io.InputStream;
158b372d 5import java.net.URL;
e66c9078
NR
6import java.net.URLEncoder;
7import java.text.SimpleDateFormat;
158b372d 8import java.util.ArrayList;
e66c9078 9import java.util.Date;
158b372d
NR
10import java.util.HashMap;
11import java.util.List;
12import java.util.Map;
13
14import org.jsoup.nodes.Document;
15import org.jsoup.nodes.Element;
16import org.jsoup.select.Elements;
17
18import be.nikiroo.fanfix.Instance;
19import be.nikiroo.fanfix.bundles.StringId;
20import be.nikiroo.fanfix.data.MetaData;
21import be.nikiroo.fanfix.supported.SupportType;
e66c9078 22import be.nikiroo.utils.Image;
596ed3d6 23import be.nikiroo.utils.StringUtils;
158b372d
NR
24
25/**
26 * A {@link BasicSearchable} for Fanfiction.NET.
27 *
28 * @author niki
29 */
30class Fanfiction extends BasicSearchable {
76ec935e
NR
31 static private String BASE_URL = "http://fanfiction.net/";
32
158b372d
NR
33 /**
34 * Create a new {@link Fanfiction}.
35 *
36 * @param type
37 * {@link SupportType#FANFICTION}
38 */
39 public Fanfiction(SupportType type) {
40 super(type);
41 }
42
43 @Override
44 public List<SearchableTag> getTags() throws IOException {
45 String storiesName = null;
46 String crossoversName = null;
47 Map<String, String> stories = new HashMap<String, String>();
48 Map<String, String> crossovers = new HashMap<String, String>();
49
76ec935e 50 Document mainPage = load(BASE_URL, true);
158b372d
NR
51 Element menu = mainPage.getElementsByClass("dropdown").first();
52 if (menu != null) {
53 Element ul = menu.getElementsByClass("dropdown-menu").first();
54 if (ul != null) {
55 Map<String, String> currentList = null;
56 for (Element li : ul.getElementsByTag("li")) {
57 if (li.hasClass("disabled")) {
58 if (storiesName == null) {
59 storiesName = li.text();
60 currentList = stories;
61 } else {
62 crossoversName = li.text();
63 currentList = crossovers;
64 }
65 } else if (currentList != null) {
66 Element a = li.getElementsByTag("a").first();
67 if (a != null) {
68 currentList.put(a.absUrl("href"), a.text());
69 }
70 }
71 }
72 }
73 }
74
75 List<SearchableTag> tags = new ArrayList<SearchableTag>();
76
77 if (storiesName != null) {
76ec935e 78 SearchableTag tag = new SearchableTag(null, storiesName, false);
158b372d 79 for (String id : stories.keySet()) {
b3b9a1cd 80 tag.add(new SearchableTag(id, stories.get(id), false, false));
158b372d
NR
81 }
82 tags.add(tag);
83 }
84
85 if (crossoversName != null) {
76ec935e 86 SearchableTag tag = new SearchableTag(null, crossoversName, false);
158b372d 87 for (String id : crossovers.keySet()) {
76ec935e 88 tag.add(new SearchableTag(id, crossovers.get(id), false, false));
158b372d
NR
89 }
90 tags.add(tag);
91 }
92
93 return tags;
94 }
95
96 @Override
91b82a5c 97 public void fillTag(SearchableTag tag) throws IOException {
158b372d
NR
98 if (tag.getId() == null || tag.isComplete()) {
99 return;
100 }
101
76ec935e 102 Document doc = load(tag.getId(), false);
158b372d
NR
103 Element list = doc.getElementById("list_output");
104 if (list != null) {
105 Element table = list.getElementsByTag("table").first();
106 if (table != null) {
107 for (Element div : table.getElementsByTag("div")) {
108 Element a = div.getElementsByTag("a").first();
109 Element span = div.getElementsByTag("span").first();
110
111 if (a != null) {
e66c9078
NR
112 String subid = a.absUrl("href");
113 boolean crossoverSubtag = subid
114 .contains("/crossovers/");
115
116 SearchableTag subtag = new SearchableTag(subid,
117 a.text(), !crossoverSubtag, !crossoverSubtag);
118
158b372d
NR
119 tag.add(subtag);
120 if (span != null) {
121 String nr = span.text();
122 if (nr.startsWith("(")) {
123 nr = nr.substring(1);
124 }
125 if (nr.endsWith(")")) {
126 nr = nr.substring(0, nr.length() - 1);
127 }
128 nr = nr.trim();
b3b9a1cd
NR
129
130 //TODO: fix toNumber/fromNumber
131 nr = nr.replaceAll("\\.[0-9]*", "");
132
596ed3d6 133 subtag.setCount(toNumber(nr));
158b372d
NR
134 }
135 }
136 }
137 }
138 }
139
140 tag.setComplete(true);
141 }
142
596ed3d6
NR
143 /**
144 * @deprecated use {@link StringUtils} when updated
145 */
146 @Deprecated
147 private static long toNumber(String value) {
148 // TODO: use StringUtils instead after update
149 long count = 0l;
150 if (value != null) {
151 try {
152 if (value.toLowerCase().endsWith("m")) {
153 count = Long.parseLong(value.substring(0,
154 value.length() - 1).trim());
155 count *= 1000000;
156 } else if (value.toLowerCase().endsWith("k")) {
157 count = Long.parseLong(value.substring(0,
158 value.length() - 1).trim());
159 count *= 1000;
160 } else {
161 count = Long.parseLong(value);
162 }
163 } catch (NumberFormatException pe) {
164 }
165 }
166
167 return count;
168 }
169
158b372d
NR
170 @Override
171 public List<MetaData> search(String search) throws IOException {
e66c9078 172 String encoded = URLEncoder.encode(search.toLowerCase(), "utf-8");
596ed3d6
NR
173 return getStories(BASE_URL + "search/?ready=1&type=story&keywords="
174 + encoded, null, null);
158b372d
NR
175 }
176
177 @Override
e66c9078
NR
178 public List<MetaData> search(SearchableTag tag, int page)
179 throws IOException {
158b372d
NR
180 List<MetaData> metas = new ArrayList<MetaData>();
181
e66c9078
NR
182 String url = tag.getId();
183 if (url != null) {
184 if (page > 1) {
185 int pos = url.indexOf("&p=");
186 if (pos >= 0) {
187 url = url.replaceAll("(.*\\&p=)[0-9]*(.*)", "$1\\" + page
188 + "$2");
189 } else {
190 url += "&p=" + page;
191 }
192 }
b3b9a1cd 193
e66c9078
NR
194 Document doc = load(url, false);
195
196 // Update the pages number if needed
b3b9a1cd 197 if (tag.getPages() < 0 && tag.isLeaf()) {
e66c9078
NR
198 tag.setPages(getPages(doc));
199 }
200
201 // Find out the full subjects (including parents)
202 String subjects = "";
203 for (SearchableTag t = tag; t != null; t = t.getParent()) {
204 if (!subjects.isEmpty()) {
205 subjects += ", ";
206 }
207 subjects += t.getName();
208 }
209
210 metas = getStories(url, doc, subjects);
211 }
212
213 return metas;
214 }
215
216 /**
217 * Return the number of pages in this stories result listing.
218 *
219 * @param doc
220 * the document
221 *
222 * @return the number of pages or -1 if unknown
223 *
224 * @throws IOException
225 * in case of I/O errors
226 */
227 private int getPages(Document doc) throws IOException {
228 int pages = -1;
76ec935e 229
e66c9078 230 if (doc != null) {
76ec935e
NR
231 Element center = doc.getElementsByTag("center").first();
232 if (center != null) {
76ec935e
NR
233 for (Element a : center.getElementsByTag("a")) {
234 if (a.absUrl("href").contains("&p=")) {
235 int thisLinkPages = -1;
236 try {
237 String[] tab = a.absUrl("href").split("=");
238 tab = tab[tab.length - 1].split("&");
239 thisLinkPages = Integer
240 .parseInt(tab[tab.length - 1]);
241 } catch (Exception e) {
242 }
243
244 pages = Math.max(pages, thisLinkPages);
245 }
246 }
76ec935e 247 }
e66c9078
NR
248 }
249
250 return pages;
251 }
252
253 /**
254 * Fetch the stories from the given page.
255 *
256 * @param sourceUrl
257 * the url of the document
258 * @param doc
259 * the document to use (if NULL, will be loaded from
260 * <tt>sourceUrl</tt>)
261 * @param mainSubject
262 * the main subject (the anime/book/movie item related to the
263 * stories, like "MLP" or "Doctor Who"), or NULL if none
264 *
265 * @return the stories found in it
266 *
267 * @throws IOException
268 * in case of I/O errors
269 */
270 private List<MetaData> getStories(String sourceUrl, Document doc,
271 String mainSubject) throws IOException {
272 List<MetaData> metas = new ArrayList<MetaData>();
273
274 if (doc == null) {
275 doc = load(sourceUrl, false);
276 }
76ec935e 277
e66c9078
NR
278 for (Element story : doc.getElementsByClass("z-list")) {
279 MetaData meta = new MetaData();
280 meta.setImageDocument(false);
281 meta.setSource(getType().getSourceName());
282
596ed3d6 283 // Title, URL, Cover
e66c9078
NR
284 Element stitle = story.getElementsByClass("stitle").first();
285 if (stitle != null) {
286 meta.setTitle(stitle.text());
287 meta.setUrl(stitle.absUrl("href"));
288 Element cover = stitle.getElementsByTag("img").first();
289 if (cover != null) {
290 // note: see data-original if needed?
291 String coverUrl = cover.absUrl("src");
292
293 try {
294 InputStream in = Instance.getCache().open(
295 new URL(coverUrl), getSupport(), true);
296 try {
297 meta.setCover(new Image(in));
298 } finally {
299 in.close();
300 }
301 } catch (Exception e) {
302 Instance.getTraceHandler()
303 .error(new Exception(
304 "Cannot download cover for Fanfiction story in search mode",
305 e));
158b372d
NR
306 }
307 }
e66c9078 308 }
158b372d 309
596ed3d6 310 // Author
e66c9078
NR
311 Elements as = story.getElementsByTag("a");
312 if (as.size() > 1) {
313 meta.setAuthor(as.get(1).text());
314 }
158b372d 315
596ed3d6 316 // Tags (concatenated text), published date, updated date, Resume
e66c9078 317 String tags = "";
596ed3d6 318 List<String> tagList = new ArrayList<String>();
e66c9078
NR
319 Elements divs = story.getElementsByTag("div");
320 if (divs.size() > 1 && divs.get(1).childNodeSize() > 0) {
321 String resume = divs.get(1).text();
322 if (divs.size() > 2) {
323 tags = divs.get(2).text();
324 resume = resume.substring(0,
325 resume.length() - tags.length()).trim();
158b372d 326
e66c9078
NR
327 for (Element d : divs.get(2).getElementsByAttribute(
328 "data-xutime")) {
329 String secs = d.attr("data-xutime");
330 try {
331 String date = new SimpleDateFormat("yyyy-MM-dd")
332 .format(new Date(
333 Long.parseLong(secs) * 1000));
334 // (updated, ) published
335 if (meta.getDate() != null) {
336 tagList.add("Updated: " + meta.getDate());
337 }
338 meta.setDate(date);
339 } catch (Exception e) {
340 }
158b372d
NR
341 }
342 }
343
e66c9078
NR
344 meta.setResume(getSupport().makeChapter(new URL(sourceUrl), 0,
345 Instance.getTrans().getString(StringId.DESCRIPTION),
158b372d 346 resume));
158b372d 347 }
158b372d 348
e66c9078
NR
349 // How are the tags ordered?
350 // We have "Rated: xx", then the language, then all other tags
351 // If the subject(s) is/are present, they are before "Rated: xx"
352
596ed3d6 353 // ////////////
e66c9078 354 // Examples: //
596ed3d6 355 // ////////////
e66c9078
NR
356
357 // Search (Luna) Tags: [Harry Potter, Rated: T, English, Chapters:
358 // 1, Words: 270, Reviews: 2, Published: 2/19/2013, Luna L.]
359
360 // Normal (MLP) Tags: [Rated: T, Spanish, Drama/Suspense, Chapters:
361 // 2, Words: 8,686, Reviews: 1, Favs: 1, Follows: 1, Updated: 4/7,
596ed3d6 362 // Published: 4/2]
e66c9078
NR
363
364 // Crossover (MLP/Who) Tags: [Rated: K+, English, Adventure/Romance,
365 // Chapters: 8, Words: 7,788, Reviews: 2, Favs: 2, Follows: 1,
596ed3d6 366 // Published: 9/1/2016]
e66c9078
NR
367
368 boolean rated = false;
369 boolean isLang = false;
596ed3d6 370 String subject = mainSubject == null ? "" : mainSubject;
e66c9078
NR
371 String[] tab = tags.split(" *- *");
372 for (int i = 0; i < tab.length; i++) {
373 String tag = tab[i];
374 if (tag.startsWith("Rated: ")) {
375 rated = true;
376 }
158b372d 377
e66c9078
NR
378 if (!rated) {
379 if (!subject.isEmpty()) {
380 subject += ", ";
381 }
382 subject += tag;
383 } else if (isLang) {
384 meta.setLang(tag);
385 isLang = false;
386 } else {
387 if (tag.contains(":")) {
388 // Handle special tags:
389 if (tag.startsWith("Words: ")) {
390 try {
391 meta.setWords(Long.parseLong(tag
392 .substring("Words: ".length())
393 .replace(",", "").trim()));
394 } catch (Exception e) {
395 }
396 } else if (tag.startsWith("Rated: ")) {
397 tagList.add(tag);
398 }
399 } else {
596ed3d6 400 // Normal tags are "/"-separated
e66c9078
NR
401 for (String t : tag.split("/")) {
402 tagList.add(t);
403 }
404 }
158b372d 405
e66c9078
NR
406 if (tag.startsWith("Rated: ")) {
407 isLang = true;
408 }
409 }
410 }
158b372d 411
e66c9078
NR
412 meta.setSubject(subject);
413 meta.setTags(tagList);
158b372d 414
e66c9078
NR
415 metas.add(meta);
416 }
158b372d 417
e66c9078 418 return metas;
158b372d
NR
419 }
420}