Commit | Line | Data |
---|---|---|
08fe2e33 NR |
1 | package be.nikiroo.fanfix.supported; |
2 | ||
3 | import java.io.IOException; | |
4 | import java.io.InputStream; | |
5 | import java.net.MalformedURLException; | |
6 | import java.net.URL; | |
cb554033 | 7 | import java.util.AbstractMap; |
08fe2e33 NR |
8 | import java.util.ArrayList; |
9 | import java.util.Collections; | |
10 | import java.util.List; | |
11 | import java.util.Map.Entry; | |
41c3bba7 NR |
12 | import java.util.SortedMap; |
13 | import java.util.TreeMap; | |
cb554033 NR |
14 | |
15 | import org.jsoup.helper.DataUtil; | |
16 | import org.jsoup.nodes.Element; | |
17 | import org.jsoup.select.Elements; | |
08fe2e33 NR |
18 | |
19 | import be.nikiroo.fanfix.Instance; | |
68686a37 | 20 | import be.nikiroo.fanfix.data.MetaData; |
16a81ef7 | 21 | import be.nikiroo.utils.Image; |
ed08c171 | 22 | import be.nikiroo.utils.Progress; |
08fe2e33 NR |
23 | import be.nikiroo.utils.StringUtils; |
24 | ||
cb554033 | 25 | class MangaFox extends BasicSupport { |
08fe2e33 NR |
26 | @Override |
27 | protected boolean isHtml() { | |
28 | return true; | |
29 | } | |
30 | ||
08fe2e33 | 31 | @Override |
cb554033 | 32 | protected MetaData getMeta() throws IOException { |
68686a37 | 33 | MetaData meta = new MetaData(); |
cb554033 NR |
34 | Element doc = getSourceNode(); |
35 | ||
36 | Element title = doc.getElementById("title"); | |
37 | Elements table = null; | |
38 | if (title != null) { | |
39 | table = title.getElementsByTag("table"); | |
40 | } | |
41 | if (table != null) { | |
42 | // Rows: header, data | |
43 | Elements rows = table.first().getElementsByTag("tr"); | |
44 | if (rows.size() > 1) { | |
45 | table = rows.get(1).getElementsByTag("td"); | |
46 | // Columns: Realeased, Authors, Artists, Genres | |
47 | if (table.size() < 4) { | |
48 | table = null; | |
49 | } | |
50 | } | |
51 | } | |
68686a37 | 52 | |
cb554033 NR |
53 | meta.setTitle(getTitle()); |
54 | if (table != null) { | |
55 | meta.setAuthor(getAuthors(table.get(1).text() + "," | |
56 | + table.get(2).text())); | |
57 | ||
58 | meta.setDate(StringUtils.unhtml(table.get(0).text()).trim()); | |
59 | meta.setTags(explode(table.get(3).text())); | |
60 | } | |
727108fe | 61 | meta.setSource(getType().getSourceName()); |
cb554033 | 62 | meta.setUrl(getSource().toString()); |
727108fe | 63 | meta.setPublisher(getType().getSourceName()); |
cb554033 | 64 | meta.setUuid(getSource().toString()); |
68686a37 | 65 | meta.setLuid(""); |
276f95c6 | 66 | meta.setLang("en"); |
68686a37 NR |
67 | meta.setSubject("manga"); |
68 | meta.setType(getType().toString()); | |
69 | meta.setImageDocument(true); | |
cb554033 | 70 | meta.setCover(getCover()); |
68686a37 NR |
71 | |
72 | return meta; | |
08fe2e33 NR |
73 | } |
74 | ||
cb554033 NR |
75 | private String getTitle() { |
76 | Element doc = getSourceNode(); | |
08fe2e33 | 77 | |
cb554033 NR |
78 | Element title = doc.getElementById("title"); |
79 | Element h1 = title.getElementsByTag("h1").first(); | |
80 | if (h1 != null) { | |
81 | return StringUtils.unhtml(h1.text()).trim(); | |
08fe2e33 NR |
82 | } |
83 | ||
84 | return null; | |
85 | } | |
86 | ||
cb554033 NR |
87 | private String getAuthors(String authorList) { |
88 | String author = ""; | |
89 | for (String auth : explode(authorList)) { | |
90 | if (!author.isEmpty()) { | |
91 | author = author + ", "; | |
08fe2e33 | 92 | } |
cb554033 | 93 | author += auth; |
08fe2e33 NR |
94 | } |
95 | ||
cb554033 | 96 | return author; |
08fe2e33 NR |
97 | } |
98 | ||
99 | @Override | |
cb554033 NR |
100 | protected String getDesc() { |
101 | Element doc = getSourceNode(); | |
102 | Element title = doc.getElementsByClass("summary").first(); | |
103 | if (title != null) { | |
af1f506f | 104 | return StringUtils.unhtml(title.text()).trim(); |
08fe2e33 NR |
105 | } |
106 | ||
107 | return null; | |
108 | } | |
109 | ||
cb554033 NR |
110 | private Image getCover() { |
111 | Element doc = getSourceNode(); | |
112 | Element cover = doc.getElementsByClass("cover").first(); | |
113 | if (cover != null) { | |
114 | cover = cover.getElementsByTag("img").first(); | |
08fe2e33 NR |
115 | } |
116 | ||
117 | if (cover != null) { | |
cb554033 NR |
118 | String coverUrl = cover.absUrl("src"); |
119 | ||
68686a37 | 120 | InputStream coverIn; |
08fe2e33 | 121 | try { |
cb554033 | 122 | coverIn = openEx(coverUrl); |
68686a37 | 123 | try { |
16a81ef7 | 124 | return new Image(coverIn); |
68686a37 NR |
125 | } finally { |
126 | coverIn.close(); | |
127 | } | |
128 | } catch (IOException e) { | |
cb554033 | 129 | Instance.getTraceHandler().error(e); |
08fe2e33 NR |
130 | } |
131 | } | |
132 | ||
133 | return null; | |
134 | } | |
135 | ||
136 | @Override | |
cb554033 | 137 | protected List<Entry<String, URL>> getChapters(Progress pg) { |
08fe2e33 NR |
138 | List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>(); |
139 | ||
41c3bba7 NR |
140 | String prefix = null; // each chapter starts with this prefix, then a |
141 | // chapter number (including "x.5"), then name | |
142 | ||
cb554033 NR |
143 | Element doc = getSourceNode(); |
144 | for (Element li : doc.getElementsByTag("li")) { | |
145 | Element el = li.getElementsByTag("h4").first(); | |
146 | if (el == null) { | |
147 | el = li.getElementsByTag("h3").first(); | |
08fe2e33 | 148 | } |
cb554033 NR |
149 | if (el != null) { |
150 | Element a = el.getElementsByTag("a").first(); | |
151 | if (a != null) { | |
152 | String title = StringUtils.unhtml(el.text()).trim(); | |
153 | try { | |
154 | String url = a.absUrl("href"); | |
155 | if (url.endsWith("1.html")) { | |
156 | url = url.substring(0, | |
157 | url.length() - "1.html".length()); | |
08fe2e33 | 158 | } |
cb554033 NR |
159 | if (!url.endsWith("/")) { |
160 | url += "/"; | |
08fe2e33 NR |
161 | } |
162 | ||
41c3bba7 NR |
163 | if (prefix == null || !prefix.isEmpty()) { |
164 | StringBuilder possiblePrefix = new StringBuilder( | |
165 | StringUtils.unhtml(a.text()).trim()); | |
166 | while (possiblePrefix.length() > 0) { | |
167 | char car = possiblePrefix.charAt(possiblePrefix | |
168 | .length() - 1); | |
169 | boolean punctuation = (car == '.' || car == ' '); | |
170 | boolean digit = (car >= '0' && car <= '9'); | |
171 | if (!punctuation && !digit) { | |
172 | break; | |
173 | } | |
174 | ||
175 | possiblePrefix.setLength(possiblePrefix | |
176 | .length() - 1); | |
177 | } | |
178 | ||
179 | if (prefix == null) { | |
180 | prefix = possiblePrefix.toString(); | |
181 | } | |
182 | ||
183 | if (!prefix.equalsIgnoreCase(possiblePrefix | |
184 | .toString())) { | |
185 | prefix = ""; // prefix not ok | |
186 | } | |
187 | } | |
188 | ||
cb554033 NR |
189 | urls.add(new AbstractMap.SimpleEntry<String, URL>( |
190 | title, new URL(url))); | |
191 | } catch (Exception e) { | |
192 | Instance.getTraceHandler().error(e); | |
193 | } | |
08fe2e33 NR |
194 | } |
195 | } | |
196 | } | |
197 | ||
41c3bba7 NR |
198 | if (prefix != null && !prefix.isEmpty()) { |
199 | try { | |
200 | // We found a prefix, so everything should be sortable | |
201 | SortedMap<Double, Entry<String, URL>> map = new TreeMap<Double, Entry<String, URL>>(); | |
202 | for (Entry<String, URL> entry : urls) { | |
203 | String num = entry.getKey().substring(prefix.length() + 1) | |
204 | .trim(); | |
205 | String name = ""; | |
206 | int pos = num.indexOf(' '); | |
207 | if (pos >= 0) { | |
208 | name = num.substring(pos).trim(); | |
209 | num = num.substring(0, pos).trim(); | |
210 | } | |
211 | ||
212 | if (!name.isEmpty()) { | |
213 | name = "Tome " + num + ": " + name; | |
214 | } else { | |
215 | name = "Tome " + num; | |
216 | } | |
217 | ||
218 | double key = Double.parseDouble(num); | |
219 | ||
220 | map.put(key, new AbstractMap.SimpleEntry<String, URL>(name, | |
221 | entry.getValue())); | |
222 | } | |
223 | urls = new ArrayList<Entry<String, URL>>(map.values()); | |
224 | } catch (NumberFormatException e) { | |
225 | Instance.getTraceHandler() | |
226 | .error(new IOException( | |
227 | "Cannot find a tome number, revert to default sorting", | |
228 | e)); | |
229 | // by default, the chapters are in reversed order | |
230 | Collections.reverse(urls); | |
231 | } | |
232 | } else { | |
233 | // by default, the chapters are in reversed order | |
234 | Collections.reverse(urls); | |
235 | } | |
08fe2e33 NR |
236 | |
237 | return urls; | |
238 | } | |
239 | ||
240 | @Override | |
cb554033 NR |
241 | protected String getChapterContent(URL chapUrl, int number, Progress pg) |
242 | throws IOException { | |
ed08c171 NR |
243 | if (pg == null) { |
244 | pg = new Progress(); | |
ed08c171 NR |
245 | } |
246 | ||
08fe2e33 | 247 | StringBuilder builder = new StringBuilder(); |
08fe2e33 | 248 | |
cb554033 NR |
249 | String url = chapUrl.toString(); |
250 | InputStream imageIn = null; | |
251 | Element imageDoc = null; | |
252 | ||
253 | // 1. find out how many images there are | |
254 | int size; | |
255 | try { | |
256 | // note: when used, the base URL can be an ad-page | |
257 | imageIn = openEx(url + "1.html"); | |
258 | imageDoc = DataUtil.load(imageIn, "UTF-8", url + "1.html"); | |
87a7a0a5 NR |
259 | } catch (IOException e) { |
260 | Instance.getTraceHandler().error( | |
261 | new IOException("Cannot get image " + 1 + " of manga", e)); | |
cb554033 | 262 | } finally { |
87a7a0a5 NR |
263 | if (imageIn != null) { |
264 | imageIn.close(); | |
265 | } | |
cb554033 NR |
266 | } |
267 | Element select = imageDoc.getElementsByClass("m").first(); | |
268 | Elements options = select.getElementsByTag("option"); | |
269 | size = options.size() - 1; // last is "Comments" | |
270 | ||
271 | pg.setMinMax(0, size); | |
272 | ||
273 | // 2. list them | |
274 | for (int i = 1; i <= size; i++) { | |
87a7a0a5 | 275 | if (i > 1) { // because first one was opened for size |
cb554033 NR |
276 | try { |
277 | imageIn = openEx(url + i + ".html"); | |
278 | imageDoc = DataUtil.load(imageIn, "UTF-8", url + i | |
279 | + ".html"); | |
87a7a0a5 NR |
280 | |
281 | String linkImage = imageDoc.getElementById("image").absUrl( | |
282 | "src"); | |
283 | if (linkImage != null) { | |
284 | builder.append("["); | |
285 | // to help with the retry and the originalUrl, part 1 | |
286 | builder.append(withoutQuery(linkImage)); | |
287 | builder.append("]<br/>"); | |
288 | } | |
289 | ||
290 | // to help with the retry and the originalUrl, part 2 | |
291 | refresh(linkImage); | |
292 | } catch (IOException e) { | |
293 | Instance.getTraceHandler().error( | |
294 | new IOException("Cannot get image " + i | |
295 | + " of manga", e)); | |
cb554033 | 296 | } finally { |
87a7a0a5 NR |
297 | if (imageIn != null) { |
298 | imageIn.close(); | |
299 | } | |
08fe2e33 NR |
300 | } |
301 | } | |
08fe2e33 NR |
302 | } |
303 | ||
08fe2e33 NR |
304 | return builder.toString(); |
305 | } | |
306 | ||
08fe2e33 | 307 | /** |
41c3bba7 | 308 | * Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}. |
08fe2e33 NR |
309 | * |
310 | * @param url | |
311 | * the URL to refresh | |
312 | * | |
313 | * @return TRUE if it was refreshed | |
314 | */ | |
315 | private boolean refresh(String url) { | |
316 | try { | |
317 | openEx(url).close(); | |
318 | return true; | |
319 | } catch (Exception e) { | |
320 | return false; | |
321 | } | |
322 | } | |
323 | ||
324 | /** | |
325 | * Open the URL through the cache, but: retry a second time after 100ms if | |
326 | * it fails, remove the query part of the {@link URL} before saving it to | |
327 | * the cache (so it can be recalled later). | |
328 | * | |
329 | * @param url | |
330 | * the {@link URL} | |
331 | * | |
332 | * @return the resource | |
333 | * | |
334 | * @throws IOException | |
335 | * in case of I/O error | |
336 | */ | |
337 | private InputStream openEx(String url) throws IOException { | |
338 | try { | |
339 | return Instance.getCache().open(new URL(url), this, true, | |
340 | withoutQuery(url)); | |
341 | } catch (Exception e) { | |
342 | // second chance | |
343 | try { | |
344 | Thread.sleep(100); | |
345 | } catch (InterruptedException ee) { | |
346 | } | |
347 | ||
348 | return Instance.getCache().open(new URL(url), this, true, | |
349 | withoutQuery(url)); | |
350 | } | |
351 | } | |
352 | ||
353 | /** | |
354 | * Return the same input {@link URL} but without the query part. | |
355 | * | |
356 | * @param url | |
357 | * the inpiut {@link URL} as a {@link String} | |
358 | * | |
359 | * @return the input {@link URL} without query | |
360 | */ | |
361 | private URL withoutQuery(String url) { | |
362 | URL o = null; | |
363 | try { | |
364 | // Remove the query from o (originalUrl), so it can be cached | |
365 | // correctly | |
366 | o = new URL(url); | |
367 | o = new URL(o.getProtocol() + "://" + o.getHost() + o.getPath()); | |
368 | ||
369 | return o; | |
370 | } catch (MalformedURLException e) { | |
371 | return null; | |
372 | } | |
373 | } | |
cb554033 NR |
374 | |
375 | /** | |
376 | * Explode an HTML comma-separated list of values into a non-duplicate text | |
377 | * {@link List} . | |
378 | * | |
379 | * @param values | |
380 | * the comma-separated values in HTML format | |
381 | * | |
382 | * @return the full list with no duplicate in text format | |
383 | */ | |
384 | private List<String> explode(String values) { | |
385 | List<String> list = new ArrayList<String>(); | |
386 | if (values != null && !values.isEmpty()) { | |
387 | for (String auth : values.split(",")) { | |
388 | String a = StringUtils.unhtml(auth).trim(); | |
389 | if (!a.isEmpty() && !list.contains(a.trim())) { | |
390 | list.add(a); | |
391 | } | |
392 | } | |
393 | } | |
394 | ||
395 | return list; | |
396 | } | |
397 | ||
398 | @Override | |
399 | protected boolean supports(URL url) { | |
400 | return "mangafox.me".equals(url.getHost()) | |
401 | || "www.mangafox.me".equals(url.getHost()) | |
402 | || "fanfox.net".equals(url.getHost()) | |
403 | || "www.fanfox.net".equals(url.getHost()); | |
404 | } | |
08fe2e33 | 405 | } |