CLI search, step 1
[fanfix.git] / src / be / nikiroo / fanfix / searchable / Fanfiction.java
1 package be.nikiroo.fanfix.searchable;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.URL;
6 import java.net.URLEncoder;
7 import java.text.SimpleDateFormat;
8 import java.util.ArrayList;
9 import java.util.Date;
10 import java.util.HashMap;
11 import java.util.List;
12 import java.util.Map;
13
14 import org.jsoup.nodes.Document;
15 import org.jsoup.nodes.Element;
16 import org.jsoup.select.Elements;
17
18 import be.nikiroo.fanfix.Instance;
19 import be.nikiroo.fanfix.bundles.StringId;
20 import be.nikiroo.fanfix.data.MetaData;
21 import be.nikiroo.fanfix.supported.SupportType;
22 import be.nikiroo.utils.Image;
23 import be.nikiroo.utils.StringUtils;
24
25 /**
26 * A {@link BasicSearchable} for Fanfiction.NET.
27 *
28 * @author niki
29 */
30 class Fanfiction extends BasicSearchable {
31 static private String BASE_URL = "http://fanfiction.net/";
32
33 /**
34 * Create a new {@link Fanfiction}.
35 *
36 * @param type
37 * {@link SupportType#FANFICTION}
38 */
39 public Fanfiction(SupportType type) {
40 super(type);
41 }
42
43 @Override
44 public List<SearchableTag> getTags() throws IOException {
45 String storiesName = null;
46 String crossoversName = null;
47 Map<String, String> stories = new HashMap<String, String>();
48 Map<String, String> crossovers = new HashMap<String, String>();
49
50 Document mainPage = load(BASE_URL, true);
51 Element menu = mainPage.getElementsByClass("dropdown").first();
52 if (menu != null) {
53 Element ul = menu.getElementsByClass("dropdown-menu").first();
54 if (ul != null) {
55 Map<String, String> currentList = null;
56 for (Element li : ul.getElementsByTag("li")) {
57 if (li.hasClass("disabled")) {
58 if (storiesName == null) {
59 storiesName = li.text();
60 currentList = stories;
61 } else {
62 crossoversName = li.text();
63 currentList = crossovers;
64 }
65 } else if (currentList != null) {
66 Element a = li.getElementsByTag("a").first();
67 if (a != null) {
68 currentList.put(a.absUrl("href"), a.text());
69 }
70 }
71 }
72 }
73 }
74
75 List<SearchableTag> tags = new ArrayList<SearchableTag>();
76
77 if (storiesName != null) {
78 SearchableTag tag = new SearchableTag(null, storiesName, false);
79 for (String id : stories.keySet()) {
80 tag.add(new SearchableTag(id, stories.get(id), true, false));
81 }
82 tags.add(tag);
83 }
84
85 if (crossoversName != null) {
86 SearchableTag tag = new SearchableTag(null, crossoversName, false);
87 for (String id : crossovers.keySet()) {
88 tag.add(new SearchableTag(id, crossovers.get(id), false, false));
89 }
90 tags.add(tag);
91 }
92
93 return tags;
94 }
95
96 @Override
97 public void fillTag(SearchableTag tag) throws IOException {
98 if (tag.getId() == null || tag.isComplete()) {
99 return;
100 }
101
102 Document doc = load(tag.getId(), false);
103 Element list = doc.getElementById("list_output");
104 if (list != null) {
105 Element table = list.getElementsByTag("table").first();
106 if (table != null) {
107 for (Element div : table.getElementsByTag("div")) {
108 Element a = div.getElementsByTag("a").first();
109 Element span = div.getElementsByTag("span").first();
110
111 if (a != null) {
112 String subid = a.absUrl("href");
113 boolean crossoverSubtag = subid
114 .contains("/crossovers/");
115
116 SearchableTag subtag = new SearchableTag(subid,
117 a.text(), !crossoverSubtag, !crossoverSubtag);
118
119 tag.add(subtag);
120 if (span != null) {
121 String nr = span.text();
122 if (nr.startsWith("(")) {
123 nr = nr.substring(1);
124 }
125 if (nr.endsWith(")")) {
126 nr = nr.substring(0, nr.length() - 1);
127 }
128 nr = nr.trim();
129 subtag.setCount(toNumber(nr));
130 }
131 }
132 }
133 }
134 }
135
136 tag.setComplete(true);
137 }
138
139 /**
140 * @deprecated use {@link StringUtils} when updated
141 */
142 @Deprecated
143 private static long toNumber(String value) {
144 // TODO: use StringUtils instead after update
145 long count = 0l;
146 if (value != null) {
147 try {
148 if (value.toLowerCase().endsWith("m")) {
149 count = Long.parseLong(value.substring(0,
150 value.length() - 1).trim());
151 count *= 1000000;
152 } else if (value.toLowerCase().endsWith("k")) {
153 count = Long.parseLong(value.substring(0,
154 value.length() - 1).trim());
155 count *= 1000;
156 } else {
157 count = Long.parseLong(value);
158 }
159 } catch (NumberFormatException pe) {
160 }
161 }
162
163 return count;
164 }
165
166 @Override
167 public List<MetaData> search(String search) throws IOException {
168 String encoded = URLEncoder.encode(search.toLowerCase(), "utf-8");
169 return getStories(BASE_URL + "search/?ready=1&type=story&keywords="
170 + encoded, null, null);
171 }
172
173 @Override
174 public List<MetaData> search(SearchableTag tag, int page)
175 throws IOException {
176 List<MetaData> metas = new ArrayList<MetaData>();
177
178 String url = tag.getId();
179 if (url != null) {
180 if (page > 1) {
181 int pos = url.indexOf("&p=");
182 if (pos >= 0) {
183 url = url.replaceAll("(.*\\&p=)[0-9]*(.*)", "$1\\" + page
184 + "$2");
185 } else {
186 url += "&p=" + page;
187 }
188 }
189
190 Document doc = load(url, false);
191
192 // Update the pages number if needed
193 if (tag.getPages() < 0) {
194 tag.setPages(getPages(doc));
195 }
196
197 // Find out the full subjects (including parents)
198 String subjects = "";
199 for (SearchableTag t = tag; t != null; t = t.getParent()) {
200 if (!subjects.isEmpty()) {
201 subjects += ", ";
202 }
203 subjects += t.getName();
204 }
205
206 metas = getStories(url, doc, subjects);
207 }
208
209 return metas;
210 }
211
212 /**
213 * Return the number of pages in this stories result listing.
214 *
215 * @param doc
216 * the document
217 *
218 * @return the number of pages or -1 if unknown
219 *
220 * @throws IOException
221 * in case of I/O errors
222 */
223 private int getPages(Document doc) throws IOException {
224 int pages = -1;
225
226 if (doc != null) {
227 Element center = doc.getElementsByTag("center").first();
228 if (center != null) {
229 for (Element a : center.getElementsByTag("a")) {
230 if (a.absUrl("href").contains("&p=")) {
231 int thisLinkPages = -1;
232 try {
233 String[] tab = a.absUrl("href").split("=");
234 tab = tab[tab.length - 1].split("&");
235 thisLinkPages = Integer
236 .parseInt(tab[tab.length - 1]);
237 } catch (Exception e) {
238 }
239
240 pages = Math.max(pages, thisLinkPages);
241 }
242 }
243 }
244 }
245
246 return pages;
247 }
248
249 /**
250 * Fetch the stories from the given page.
251 *
252 * @param sourceUrl
253 * the url of the document
254 * @param doc
255 * the document to use (if NULL, will be loaded from
256 * <tt>sourceUrl</tt>)
257 * @param mainSubject
258 * the main subject (the anime/book/movie item related to the
259 * stories, like "MLP" or "Doctor Who"), or NULL if none
260 *
261 * @return the stories found in it
262 *
263 * @throws IOException
264 * in case of I/O errors
265 */
266 private List<MetaData> getStories(String sourceUrl, Document doc,
267 String mainSubject) throws IOException {
268 List<MetaData> metas = new ArrayList<MetaData>();
269
270 if (doc == null) {
271 doc = load(sourceUrl, false);
272 }
273
274 for (Element story : doc.getElementsByClass("z-list")) {
275 MetaData meta = new MetaData();
276 meta.setImageDocument(false);
277 meta.setSource(getType().getSourceName());
278
279 // Title, URL, Cover
280 Element stitle = story.getElementsByClass("stitle").first();
281 if (stitle != null) {
282 meta.setTitle(stitle.text());
283 meta.setUrl(stitle.absUrl("href"));
284 Element cover = stitle.getElementsByTag("img").first();
285 if (cover != null) {
286 // note: see data-original if needed?
287 String coverUrl = cover.absUrl("src");
288
289 try {
290 InputStream in = Instance.getCache().open(
291 new URL(coverUrl), getSupport(), true);
292 try {
293 meta.setCover(new Image(in));
294 } finally {
295 in.close();
296 }
297 } catch (Exception e) {
298 Instance.getTraceHandler()
299 .error(new Exception(
300 "Cannot download cover for Fanfiction story in search mode",
301 e));
302 }
303 }
304 }
305
306 // Author
307 Elements as = story.getElementsByTag("a");
308 if (as.size() > 1) {
309 meta.setAuthor(as.get(1).text());
310 }
311
312 // Tags (concatenated text), published date, updated date, Resume
313 String tags = "";
314 List<String> tagList = new ArrayList<String>();
315 Elements divs = story.getElementsByTag("div");
316 if (divs.size() > 1 && divs.get(1).childNodeSize() > 0) {
317 String resume = divs.get(1).text();
318 if (divs.size() > 2) {
319 tags = divs.get(2).text();
320 resume = resume.substring(0,
321 resume.length() - tags.length()).trim();
322
323 for (Element d : divs.get(2).getElementsByAttribute(
324 "data-xutime")) {
325 String secs = d.attr("data-xutime");
326 try {
327 String date = new SimpleDateFormat("yyyy-MM-dd")
328 .format(new Date(
329 Long.parseLong(secs) * 1000));
330 // (updated, ) published
331 if (meta.getDate() != null) {
332 tagList.add("Updated: " + meta.getDate());
333 }
334 meta.setDate(date);
335 } catch (Exception e) {
336 }
337 }
338 }
339
340 meta.setResume(getSupport().makeChapter(new URL(sourceUrl), 0,
341 Instance.getTrans().getString(StringId.DESCRIPTION),
342 resume));
343 }
344
345 // How are the tags ordered?
346 // We have "Rated: xx", then the language, then all other tags
347 // If the subject(s) is/are present, they are before "Rated: xx"
348
349 // ////////////
350 // Examples: //
351 // ////////////
352
353 // Search (Luna) Tags: [Harry Potter, Rated: T, English, Chapters:
354 // 1, Words: 270, Reviews: 2, Published: 2/19/2013, Luna L.]
355
356 // Normal (MLP) Tags: [Rated: T, Spanish, Drama/Suspense, Chapters:
357 // 2, Words: 8,686, Reviews: 1, Favs: 1, Follows: 1, Updated: 4/7,
358 // Published: 4/2]
359
360 // Crossover (MLP/Who) Tags: [Rated: K+, English, Adventure/Romance,
361 // Chapters: 8, Words: 7,788, Reviews: 2, Favs: 2, Follows: 1,
362 // Published: 9/1/2016]
363
364 boolean rated = false;
365 boolean isLang = false;
366 String subject = mainSubject == null ? "" : mainSubject;
367 String[] tab = tags.split(" *- *");
368 for (int i = 0; i < tab.length; i++) {
369 String tag = tab[i];
370 if (tag.startsWith("Rated: ")) {
371 rated = true;
372 }
373
374 if (!rated) {
375 if (!subject.isEmpty()) {
376 subject += ", ";
377 }
378 subject += tag;
379 } else if (isLang) {
380 meta.setLang(tag);
381 isLang = false;
382 } else {
383 if (tag.contains(":")) {
384 // Handle special tags:
385 if (tag.startsWith("Words: ")) {
386 try {
387 meta.setWords(Long.parseLong(tag
388 .substring("Words: ".length())
389 .replace(",", "").trim()));
390 } catch (Exception e) {
391 }
392 } else if (tag.startsWith("Rated: ")) {
393 tagList.add(tag);
394 }
395 } else {
396 // Normal tags are "/"-separated
397 for (String t : tag.split("/")) {
398 tagList.add(t);
399 }
400 }
401
402 if (tag.startsWith("Rated: ")) {
403 isLang = true;
404 }
405 }
406 }
407
408 meta.setSubject(subject);
409 meta.setTags(tagList);
410
411 metas.add(meta);
412 }
413
414 return metas;
415 }
416 }