Commit | Line | Data |
---|---|---|
158b372d NR |
1 | package be.nikiroo.fanfix.searchable; |
2 | ||
3 | import java.io.IOException; | |
e66c9078 | 4 | import java.io.InputStream; |
158b372d | 5 | import java.net.URL; |
e66c9078 NR |
6 | import java.net.URLEncoder; |
7 | import java.text.SimpleDateFormat; | |
158b372d | 8 | import java.util.ArrayList; |
e66c9078 | 9 | import java.util.Date; |
158b372d NR |
10 | import java.util.HashMap; |
11 | import java.util.List; | |
12 | import java.util.Map; | |
13 | ||
14 | import org.jsoup.nodes.Document; | |
15 | import org.jsoup.nodes.Element; | |
16 | import org.jsoup.select.Elements; | |
17 | ||
18 | import be.nikiroo.fanfix.Instance; | |
19 | import be.nikiroo.fanfix.bundles.StringId; | |
20 | import be.nikiroo.fanfix.data.MetaData; | |
21 | import be.nikiroo.fanfix.supported.SupportType; | |
e66c9078 | 22 | import be.nikiroo.utils.Image; |
596ed3d6 | 23 | import be.nikiroo.utils.StringUtils; |
158b372d NR |
24 | |
25 | /** | |
26 | * A {@link BasicSearchable} for Fanfiction.NET. | |
27 | * | |
28 | * @author niki | |
29 | */ | |
30 | class Fanfiction extends BasicSearchable { | |
76ec935e NR |
31 | static private String BASE_URL = "http://fanfiction.net/"; |
32 | ||
158b372d NR |
33 | /** |
34 | * Create a new {@link Fanfiction}. | |
35 | * | |
36 | * @param type | |
37 | * {@link SupportType#FANFICTION} | |
38 | */ | |
39 | public Fanfiction(SupportType type) { | |
40 | super(type); | |
41 | } | |
42 | ||
43 | @Override | |
44 | public List<SearchableTag> getTags() throws IOException { | |
45 | String storiesName = null; | |
46 | String crossoversName = null; | |
47 | Map<String, String> stories = new HashMap<String, String>(); | |
48 | Map<String, String> crossovers = new HashMap<String, String>(); | |
49 | ||
76ec935e | 50 | Document mainPage = load(BASE_URL, true); |
158b372d NR |
51 | Element menu = mainPage.getElementsByClass("dropdown").first(); |
52 | if (menu != null) { | |
53 | Element ul = menu.getElementsByClass("dropdown-menu").first(); | |
54 | if (ul != null) { | |
55 | Map<String, String> currentList = null; | |
56 | for (Element li : ul.getElementsByTag("li")) { | |
57 | if (li.hasClass("disabled")) { | |
58 | if (storiesName == null) { | |
59 | storiesName = li.text(); | |
60 | currentList = stories; | |
61 | } else { | |
62 | crossoversName = li.text(); | |
63 | currentList = crossovers; | |
64 | } | |
65 | } else if (currentList != null) { | |
66 | Element a = li.getElementsByTag("a").first(); | |
67 | if (a != null) { | |
68 | currentList.put(a.absUrl("href"), a.text()); | |
69 | } | |
70 | } | |
71 | } | |
72 | } | |
73 | } | |
74 | ||
75 | List<SearchableTag> tags = new ArrayList<SearchableTag>(); | |
76 | ||
77 | if (storiesName != null) { | |
76ec935e | 78 | SearchableTag tag = new SearchableTag(null, storiesName, false); |
158b372d | 79 | for (String id : stories.keySet()) { |
b3b9a1cd | 80 | tag.add(new SearchableTag(id, stories.get(id), false, false)); |
158b372d NR |
81 | } |
82 | tags.add(tag); | |
83 | } | |
84 | ||
85 | if (crossoversName != null) { | |
76ec935e | 86 | SearchableTag tag = new SearchableTag(null, crossoversName, false); |
158b372d | 87 | for (String id : crossovers.keySet()) { |
76ec935e | 88 | tag.add(new SearchableTag(id, crossovers.get(id), false, false)); |
158b372d NR |
89 | } |
90 | tags.add(tag); | |
91 | } | |
92 | ||
93 | return tags; | |
94 | } | |
95 | ||
96 | @Override | |
91b82a5c | 97 | public void fillTag(SearchableTag tag) throws IOException { |
158b372d NR |
98 | if (tag.getId() == null || tag.isComplete()) { |
99 | return; | |
100 | } | |
101 | ||
76ec935e | 102 | Document doc = load(tag.getId(), false); |
158b372d NR |
103 | Element list = doc.getElementById("list_output"); |
104 | if (list != null) { | |
105 | Element table = list.getElementsByTag("table").first(); | |
106 | if (table != null) { | |
107 | for (Element div : table.getElementsByTag("div")) { | |
108 | Element a = div.getElementsByTag("a").first(); | |
109 | Element span = div.getElementsByTag("span").first(); | |
110 | ||
111 | if (a != null) { | |
e66c9078 NR |
112 | String subid = a.absUrl("href"); |
113 | boolean crossoverSubtag = subid | |
114 | .contains("/crossovers/"); | |
115 | ||
116 | SearchableTag subtag = new SearchableTag(subid, | |
117 | a.text(), !crossoverSubtag, !crossoverSubtag); | |
118 | ||
158b372d NR |
119 | tag.add(subtag); |
120 | if (span != null) { | |
121 | String nr = span.text(); | |
122 | if (nr.startsWith("(")) { | |
123 | nr = nr.substring(1); | |
124 | } | |
125 | if (nr.endsWith(")")) { | |
126 | nr = nr.substring(0, nr.length() - 1); | |
127 | } | |
128 | nr = nr.trim(); | |
74a43961 NR |
129 | |
130 | // TODO: fix toNumber/fromNumber | |
b3b9a1cd | 131 | nr = nr.replaceAll("\\.[0-9]*", ""); |
74a43961 NR |
132 | |
133 | subtag.setCount(StringUtils.toNumber(nr)); | |
158b372d NR |
134 | } |
135 | } | |
136 | } | |
137 | } | |
138 | } | |
139 | ||
140 | tag.setComplete(true); | |
141 | } | |
142 | ||
143 | @Override | |
8ffc8b73 | 144 | public List<MetaData> search(String search, int page) throws IOException { |
e66c9078 | 145 | String encoded = URLEncoder.encode(search.toLowerCase(), "utf-8"); |
124442f1 NR |
146 | String url = BASE_URL + "search/?ready=1&type=story&keywords=" |
147 | + encoded + "&ppage=" + page; | |
148 | ||
149 | return getStories(url, null, null); | |
150 | } | |
151 | ||
158b372d | 152 | @Override |
e66c9078 NR |
153 | public List<MetaData> search(SearchableTag tag, int page) |
154 | throws IOException { | |
158b372d NR |
155 | List<MetaData> metas = new ArrayList<MetaData>(); |
156 | ||
e66c9078 NR |
157 | String url = tag.getId(); |
158 | if (url != null) { | |
159 | if (page > 1) { | |
160 | int pos = url.indexOf("&p="); | |
161 | if (pos >= 0) { | |
162 | url = url.replaceAll("(.*\\&p=)[0-9]*(.*)", "$1\\" + page | |
163 | + "$2"); | |
164 | } else { | |
165 | url += "&p=" + page; | |
166 | } | |
167 | } | |
74a43961 | 168 | |
e66c9078 NR |
169 | Document doc = load(url, false); |
170 | ||
171 | // Update the pages number if needed | |
b3b9a1cd | 172 | if (tag.getPages() < 0 && tag.isLeaf()) { |
e66c9078 NR |
173 | tag.setPages(getPages(doc)); |
174 | } | |
175 | ||
176 | // Find out the full subjects (including parents) | |
177 | String subjects = ""; | |
178 | for (SearchableTag t = tag; t != null; t = t.getParent()) { | |
179 | if (!subjects.isEmpty()) { | |
180 | subjects += ", "; | |
181 | } | |
182 | subjects += t.getName(); | |
183 | } | |
184 | ||
185 | metas = getStories(url, doc, subjects); | |
186 | } | |
187 | ||
188 | return metas; | |
189 | } | |
190 | ||
81acd363 NR |
191 | @Override |
192 | public int searchPages(String search) throws IOException { | |
193 | String encoded = URLEncoder.encode(search.toLowerCase(), "utf-8"); | |
194 | String url = BASE_URL + "search/?ready=1&type=story&keywords=" | |
195 | + encoded; | |
196 | ||
197 | return getPages(load(url, false)); | |
198 | } | |
199 | ||
200 | @Override | |
201 | public int searchPages(SearchableTag tag) throws IOException { | |
202 | if (tag.isLeaf()) { | |
203 | String url = tag.getId(); | |
204 | return getPages(load(url, false)); | |
205 | } | |
206 | ||
207 | return 0; | |
208 | } | |
209 | ||
e66c9078 NR |
210 | /** |
211 | * Return the number of pages in this stories result listing. | |
212 | * | |
213 | * @param doc | |
214 | * the document | |
215 | * | |
216 | * @return the number of pages or -1 if unknown | |
e66c9078 | 217 | */ |
8ffc8b73 | 218 | private int getPages(Document doc) { |
e66c9078 | 219 | int pages = -1; |
76ec935e | 220 | |
e66c9078 | 221 | if (doc != null) { |
76ec935e NR |
222 | Element center = doc.getElementsByTag("center").first(); |
223 | if (center != null) { | |
76ec935e NR |
224 | for (Element a : center.getElementsByTag("a")) { |
225 | if (a.absUrl("href").contains("&p=")) { | |
226 | int thisLinkPages = -1; | |
227 | try { | |
228 | String[] tab = a.absUrl("href").split("="); | |
229 | tab = tab[tab.length - 1].split("&"); | |
230 | thisLinkPages = Integer | |
231 | .parseInt(tab[tab.length - 1]); | |
232 | } catch (Exception e) { | |
233 | } | |
234 | ||
235 | pages = Math.max(pages, thisLinkPages); | |
236 | } | |
237 | } | |
76ec935e | 238 | } |
e66c9078 NR |
239 | } |
240 | ||
241 | return pages; | |
242 | } | |
243 | ||
244 | /** | |
245 | * Fetch the stories from the given page. | |
246 | * | |
247 | * @param sourceUrl | |
248 | * the url of the document | |
249 | * @param doc | |
250 | * the document to use (if NULL, will be loaded from | |
251 | * <tt>sourceUrl</tt>) | |
252 | * @param mainSubject | |
253 | * the main subject (the anime/book/movie item related to the | |
254 | * stories, like "MLP" or "Doctor Who"), or NULL if none | |
255 | * | |
256 | * @return the stories found in it | |
257 | * | |
258 | * @throws IOException | |
259 | * in case of I/O errors | |
260 | */ | |
261 | private List<MetaData> getStories(String sourceUrl, Document doc, | |
262 | String mainSubject) throws IOException { | |
263 | List<MetaData> metas = new ArrayList<MetaData>(); | |
264 | ||
265 | if (doc == null) { | |
266 | doc = load(sourceUrl, false); | |
267 | } | |
76ec935e | 268 | |
e66c9078 NR |
269 | for (Element story : doc.getElementsByClass("z-list")) { |
270 | MetaData meta = new MetaData(); | |
271 | meta.setImageDocument(false); | |
272 | meta.setSource(getType().getSourceName()); | |
d16065ec NR |
273 | meta.setPublisher(getType().getSourceName()); |
274 | meta.setType(getType().toString()); | |
e66c9078 | 275 | |
596ed3d6 | 276 | // Title, URL, Cover |
e66c9078 NR |
277 | Element stitle = story.getElementsByClass("stitle").first(); |
278 | if (stitle != null) { | |
279 | meta.setTitle(stitle.text()); | |
280 | meta.setUrl(stitle.absUrl("href")); | |
d16065ec | 281 | meta.setUuid(meta.getUrl()); |
e66c9078 NR |
282 | Element cover = stitle.getElementsByTag("img").first(); |
283 | if (cover != null) { | |
284 | // note: see data-original if needed? | |
285 | String coverUrl = cover.absUrl("src"); | |
286 | ||
287 | try { | |
d66deb8d | 288 | InputStream in = Instance.getInstance().getCache().open(new URL(coverUrl), getSupport(), true); |
e66c9078 NR |
289 | try { |
290 | meta.setCover(new Image(in)); | |
291 | } finally { | |
292 | in.close(); | |
293 | } | |
294 | } catch (Exception e) { | |
12443642 | 295 | // Should not happen on Fanfiction.net |
d66deb8d NR |
296 | Instance.getInstance().getTraceHandler().error(new Exception( |
297 | "Cannot download cover for Fanfiction story in search mode: " + meta.getTitle(), e)); | |
158b372d NR |
298 | } |
299 | } | |
e66c9078 | 300 | } |
158b372d | 301 | |
596ed3d6 | 302 | // Author |
e66c9078 NR |
303 | Elements as = story.getElementsByTag("a"); |
304 | if (as.size() > 1) { | |
305 | meta.setAuthor(as.get(1).text()); | |
306 | } | |
158b372d | 307 | |
596ed3d6 | 308 | // Tags (concatenated text), published date, updated date, Resume |
e66c9078 | 309 | String tags = ""; |
596ed3d6 | 310 | List<String> tagList = new ArrayList<String>(); |
e66c9078 NR |
311 | Elements divs = story.getElementsByTag("div"); |
312 | if (divs.size() > 1 && divs.get(1).childNodeSize() > 0) { | |
313 | String resume = divs.get(1).text(); | |
314 | if (divs.size() > 2) { | |
315 | tags = divs.get(2).text(); | |
316 | resume = resume.substring(0, | |
317 | resume.length() - tags.length()).trim(); | |
158b372d | 318 | |
e66c9078 NR |
319 | for (Element d : divs.get(2).getElementsByAttribute( |
320 | "data-xutime")) { | |
321 | String secs = d.attr("data-xutime"); | |
322 | try { | |
323 | String date = new SimpleDateFormat("yyyy-MM-dd") | |
324 | .format(new Date( | |
325 | Long.parseLong(secs) * 1000)); | |
326 | // (updated, ) published | |
327 | if (meta.getDate() != null) { | |
328 | tagList.add("Updated: " + meta.getDate()); | |
329 | } | |
330 | meta.setDate(date); | |
331 | } catch (Exception e) { | |
332 | } | |
158b372d NR |
333 | } |
334 | } | |
335 | ||
e66c9078 | 336 | meta.setResume(getSupport().makeChapter(new URL(sourceUrl), 0, |
d66deb8d | 337 | Instance.getInstance().getTrans().getString(StringId.DESCRIPTION), resume)); |
158b372d | 338 | } |
158b372d | 339 | |
e66c9078 NR |
340 | // How are the tags ordered? |
341 | // We have "Rated: xx", then the language, then all other tags | |
342 | // If the subject(s) is/are present, they are before "Rated: xx" | |
343 | ||
596ed3d6 | 344 | // //////////// |
e66c9078 | 345 | // Examples: // |
596ed3d6 | 346 | // //////////// |
e66c9078 NR |
347 | |
348 | // Search (Luna) Tags: [Harry Potter, Rated: T, English, Chapters: | |
349 | // 1, Words: 270, Reviews: 2, Published: 2/19/2013, Luna L.] | |
350 | ||
351 | // Normal (MLP) Tags: [Rated: T, Spanish, Drama/Suspense, Chapters: | |
352 | // 2, Words: 8,686, Reviews: 1, Favs: 1, Follows: 1, Updated: 4/7, | |
596ed3d6 | 353 | // Published: 4/2] |
e66c9078 NR |
354 | |
355 | // Crossover (MLP/Who) Tags: [Rated: K+, English, Adventure/Romance, | |
356 | // Chapters: 8, Words: 7,788, Reviews: 2, Favs: 2, Follows: 1, | |
596ed3d6 | 357 | // Published: 9/1/2016] |
e66c9078 NR |
358 | |
359 | boolean rated = false; | |
360 | boolean isLang = false; | |
596ed3d6 | 361 | String subject = mainSubject == null ? "" : mainSubject; |
e66c9078 NR |
362 | String[] tab = tags.split(" *- *"); |
363 | for (int i = 0; i < tab.length; i++) { | |
364 | String tag = tab[i]; | |
365 | if (tag.startsWith("Rated: ")) { | |
366 | rated = true; | |
367 | } | |
158b372d | 368 | |
e66c9078 NR |
369 | if (!rated) { |
370 | if (!subject.isEmpty()) { | |
371 | subject += ", "; | |
372 | } | |
373 | subject += tag; | |
374 | } else if (isLang) { | |
375 | meta.setLang(tag); | |
376 | isLang = false; | |
377 | } else { | |
378 | if (tag.contains(":")) { | |
379 | // Handle special tags: | |
380 | if (tag.startsWith("Words: ")) { | |
381 | try { | |
382 | meta.setWords(Long.parseLong(tag | |
383 | .substring("Words: ".length()) | |
384 | .replace(",", "").trim())); | |
385 | } catch (Exception e) { | |
386 | } | |
387 | } else if (tag.startsWith("Rated: ")) { | |
388 | tagList.add(tag); | |
389 | } | |
390 | } else { | |
596ed3d6 | 391 | // Normal tags are "/"-separated |
e66c9078 NR |
392 | for (String t : tag.split("/")) { |
393 | tagList.add(t); | |
394 | } | |
395 | } | |
158b372d | 396 | |
e66c9078 NR |
397 | if (tag.startsWith("Rated: ")) { |
398 | isLang = true; | |
399 | } | |
400 | } | |
401 | } | |
158b372d | 402 | |
e66c9078 NR |
403 | meta.setSubject(subject); |
404 | meta.setTags(tagList); | |
158b372d | 405 | |
e66c9078 NR |
406 | metas.add(meta); |
407 | } | |
158b372d | 408 | |
e66c9078 | 409 | return metas; |
158b372d NR |
410 | } |
411 | } |