Fix epub compatibility + cover image ext
[fanfix.git] / src / be / nikiroo / fanfix / supported / MangaFox.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
3import java.io.IOException;
4import java.io.InputStream;
5import java.net.MalformedURLException;
6import java.net.URL;
7import java.util.ArrayList;
8import java.util.Collections;
9import java.util.List;
10import java.util.Map.Entry;
11import java.util.Scanner;
12
13import be.nikiroo.fanfix.Instance;
68686a37 14import be.nikiroo.fanfix.data.MetaData;
16a81ef7 15import be.nikiroo.utils.Image;
ed08c171 16import be.nikiroo.utils.Progress;
08fe2e33
NR
17import be.nikiroo.utils.StringUtils;
18
19class MangaFox extends BasicSupport {
20 @Override
21 protected boolean isHtml() {
22 return true;
23 }
24
25 @Override
26 public String getSourceName() {
d3c15421 27 return "MangaFox.me";
08fe2e33
NR
28 }
29
30 @Override
68686a37
NR
31 protected MetaData getMeta(URL source, InputStream in) throws IOException {
32 MetaData meta = new MetaData();
33
34 meta.setTitle(getTitle(reset(in)));
35 meta.setAuthor(getAuthor(reset(in)));
36 meta.setDate(getDate(reset(in)));
37 meta.setTags(getTags(reset(in)));
38 meta.setSource(getSourceName());
2206ef66 39 meta.setUrl(source.toString());
68686a37
NR
40 meta.setPublisher(getSourceName());
41 meta.setUuid(source.toString());
42 meta.setLuid("");
43 meta.setLang("EN");
44 meta.setSubject("manga");
45 meta.setType(getType().toString());
46 meta.setImageDocument(true);
47 meta.setCover(getCover(reset(in)));
48
49 return meta;
08fe2e33
NR
50 }
51
68686a37 52 private List<String> getTags(InputStream in) {
08fe2e33
NR
53 List<String> tags = new ArrayList<String>();
54
55 String line = getLine(in, "/genres/", 0);
56 if (line != null) {
57 line = StringUtils.unhtml(line);
58 String[] tab = line.split(",");
59 if (tab != null) {
60 for (String tag : tab) {
61 tags.add(tag.trim());
62 }
63 }
64 }
65
66 return tags;
67 }
68
68686a37 69 private String getTitle(InputStream in) {
08fe2e33
NR
70 String line = getLine(in, " property=\"og:title\"", 0);
71 if (line != null) {
72 int pos = -1;
73 for (int i = 0; i < 3; i++) {
74 pos = line.indexOf('"', pos + 1);
75 }
76
77 if (pos >= 0) {
78 line = line.substring(pos + 1);
79 pos = line.indexOf('"');
80 if (pos >= 0) {
81 return line.substring(0, pos);
82 }
83 }
84 }
85
86 return null;
87 }
88
68686a37 89 private String getAuthor(InputStream in) {
08fe2e33
NR
90 List<String> authors = new ArrayList<String>();
91
92 String line = getLine(in, "/author/", 0, false);
93 if (line != null) {
94 for (String ln : StringUtils.unhtml(line).split(",")) {
95 if (ln != null && !ln.trim().isEmpty()
96 && !authors.contains(ln.trim())) {
97 authors.add(ln.trim());
98 }
99 }
100 }
101
102 try {
103 in.reset();
104 } catch (IOException e) {
62c63b07 105 Instance.getTraceHandler().error(e);
08fe2e33
NR
106 }
107
108 line = getLine(in, "/artist/", 0, false);
109 if (line != null) {
110 for (String ln : StringUtils.unhtml(line).split(",")) {
111 if (ln != null && !ln.trim().isEmpty()
112 && !authors.contains(ln.trim())) {
113 authors.add(ln.trim());
114 }
115 }
116 }
117
118 if (authors.isEmpty()) {
119 return null;
211f7ddb 120 }
08fe2e33 121
211f7ddb
NR
122 StringBuilder builder = new StringBuilder();
123 for (String author : authors) {
124 if (builder.length() > 0) {
125 builder.append(", ");
08fe2e33
NR
126 }
127
211f7ddb 128 builder.append(author);
08fe2e33 129 }
211f7ddb
NR
130
131 return builder.toString();
08fe2e33
NR
132 }
133
68686a37 134 private String getDate(InputStream in) {
08fe2e33
NR
135 String line = getLine(in, "/released/", 0);
136 if (line != null) {
137 line = StringUtils.unhtml(line);
138 return line.trim();
139 }
140
141 return null;
142 }
143
144 @Override
145 protected String getDesc(URL source, InputStream in) {
146 String line = getLine(in, " property=\"og:description\"", 0);
147 if (line != null) {
148 int pos = -1;
149 for (int i = 0; i < 3; i++) {
150 pos = line.indexOf('"', pos + 1);
151 }
152
153 if (pos >= 0) {
154 line = line.substring(pos + 1);
155 pos = line.indexOf('"');
156 if (pos >= 0) {
157 return line.substring(0, pos);
158 }
159 }
160 }
161
162 return null;
163 }
164
16a81ef7 165 private Image getCover(InputStream in) {
08fe2e33
NR
166 String line = getLine(in, " property=\"og:image\"", 0);
167 String cover = null;
168 if (line != null) {
169 int pos = -1;
170 for (int i = 0; i < 3; i++) {
171 pos = line.indexOf('"', pos + 1);
172 }
173
174 if (pos >= 0) {
175 line = line.substring(pos + 1);
176 pos = line.indexOf('"');
177 if (pos >= 0) {
178 cover = line.substring(0, pos);
179 }
180 }
181 }
182
183 if (cover != null) {
68686a37 184 InputStream coverIn;
08fe2e33 185 try {
68686a37
NR
186 coverIn = openEx(cover);
187 try {
16a81ef7 188 return new Image(coverIn);
68686a37
NR
189 } finally {
190 coverIn.close();
191 }
192 } catch (IOException e) {
08fe2e33
NR
193 }
194 }
195
196 return null;
197 }
198
199 @Override
ed08c171
NR
200 protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
201 Progress pg) {
08fe2e33
NR
202 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
203
204 String volumeAt = "<h3 class=\"volume\">";
205 String linkAt = "href=\"http://mangafox.me/";
206 String endAt = "<script type=\"text/javascript\">";
207
208 boolean started = false;
209
210 @SuppressWarnings("resource")
211 Scanner scan = new Scanner(in, "UTF-8");
212 scan.useDelimiter("\\n");
213 while (scan.hasNext()) {
214 String line = scan.next();
215
216 if (started && line.contains(endAt)) {
217 break;
218 } else if (!started && line.contains(volumeAt)) {
219 started = true;
220 }
221
222 if (started && line.contains(linkAt)) {
223 // Chapter content url
224 String url = null;
225 int pos = line.indexOf("href=\"");
226 if (pos >= 0) {
227 line = line.substring(pos + "href=\"".length());
228 pos = line.indexOf('\"');
229 if (pos >= 0) {
230 url = line.substring(0, pos);
231 }
232 }
233
234 // Chapter name
235 String name = null;
236 if (scan.hasNext()) {
237 name = StringUtils.unhtml(scan.next()).trim();
238 // Remove the "new" tag if present
239 if (name.endsWith("new")) {
240 name = name.substring(0, name.length() - 3).trim();
241 }
242 }
243
08fe2e33
NR
244 try {
245 final String key = name;
246 final URL value = new URL(url);
247 urls.add(new Entry<String, URL>() {
211f7ddb 248 @Override
08fe2e33
NR
249 public URL setValue(URL value) {
250 return null;
251 }
252
211f7ddb 253 @Override
08fe2e33
NR
254 public String getKey() {
255 return key;
256 }
257
211f7ddb 258 @Override
08fe2e33
NR
259 public URL getValue() {
260 return value;
261 }
262 });
263 } catch (MalformedURLException e) {
62c63b07 264 Instance.getTraceHandler().error(e);
08fe2e33
NR
265 }
266 }
267 }
268
ed08c171
NR
269 if (pg == null) {
270 pg = new Progress(0, urls.size());
271 } else {
272 pg.setMinMax(0, urls.size());
273 }
274
275 int i = 1;
276 for (Entry<String, URL> entry : urls) {
277 // to help with the retry and the originalUrl
278 refresh(entry.getValue().toString());
279 pg.setProgress(i++);
280 }
281
08fe2e33
NR
282 // the chapters are in reversed order
283 Collections.reverse(urls);
284
285 return urls;
286 }
287
288 @Override
ed08c171
NR
289 protected String getChapterContent(URL source, InputStream in, int number,
290 Progress pg) {
291 if (pg == null) {
292 pg = new Progress();
293 } else {
294 // Since we have no idea how many images we have, we cycle from 0
295 // to max, then again, then again...
296 pg.setMinMax(0, 20);
297 }
298
08fe2e33
NR
299 StringBuilder builder = new StringBuilder();
300 String base = getCurrentReferer().toString();
301 int pos = base.lastIndexOf('/');
302 base = base.substring(0, pos + 1); // including the '/' at the end
303
ed08c171 304 int i = 1;
08fe2e33
NR
305 boolean close = false;
306 while (in != null) {
307 String linkNextLine = getLine(in, "return enlarge()", 0);
308 try {
309 in.reset();
310 } catch (IOException e) {
62c63b07 311 Instance.getTraceHandler().error(e);
08fe2e33
NR
312 }
313
314 String linkImageLine = getLine(in, "return enlarge()", 1);
315 String linkNext = null;
316 String linkImage = null;
317 pos = linkNextLine.indexOf("href=\"");
318 if (pos >= 0) {
319 linkNextLine = linkNextLine.substring(pos + "href=\"".length());
320 pos = linkNextLine.indexOf('\"');
321 if (pos >= 0) {
322 linkNext = linkNextLine.substring(0, pos);
323 }
324 }
325 pos = linkImageLine.indexOf("src=\"");
326 if (pos >= 0) {
327 linkImageLine = linkImageLine
328 .substring(pos + "src=\"".length());
329 pos = linkImageLine.indexOf('\"');
330 if (pos >= 0) {
331 linkImage = linkImageLine.substring(0, pos);
332 }
333 }
334
335 if (linkImage != null) {
336 builder.append("[");
337 // to help with the retry and the originalUrl, part 1
338 builder.append(withoutQuery(linkImage));
406447a4 339 builder.append("]<br/>");
08fe2e33
NR
340 }
341
342 // to help with the retry and the originalUrl, part 2
343 refresh(linkImage);
ed08c171 344 pg.setProgress((i++) % pg.getMax());
08fe2e33
NR
345
346 if (close) {
347 try {
348 in.close();
349 } catch (IOException e) {
62c63b07 350 Instance.getTraceHandler().error(e);
08fe2e33
NR
351 }
352 }
353
354 in = null;
355 if (linkNext != null && !"javascript:void(0);".equals(linkNext)) {
356 URL url;
357 try {
358 url = new URL(base + linkNext);
359 in = openEx(base + linkNext);
360 setCurrentReferer(url);
ed08c171 361 pg.setProgress((i++) % pg.getMax());
08fe2e33 362 } catch (IOException e) {
16a81ef7
NR
363 Instance.getTraceHandler().error(
364 new IOException(
365 "Cannot get the next manga page which is: "
366 + linkNext, e));
08fe2e33
NR
367 }
368 }
369
370 close = true;
371 }
372
373 setCurrentReferer(source);
374 return builder.toString();
375 }
376
377 @Override
378 protected boolean supports(URL url) {
379 return "mangafox.me".equals(url.getHost())
380 || "www.mangafox.me".equals(url.getHost());
381 }
382
383 /**
384 * Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}.
385 *
386 * @param url
387 * the URL to refresh
388 *
389 * @return TRUE if it was refreshed
390 */
391 private boolean refresh(String url) {
392 try {
393 openEx(url).close();
394 return true;
395 } catch (Exception e) {
396 return false;
397 }
398 }
399
400 /**
401 * Open the URL through the cache, but: retry a second time after 100ms if
402 * it fails, remove the query part of the {@link URL} before saving it to
403 * the cache (so it can be recalled later).
404 *
405 * @param url
406 * the {@link URL}
407 *
408 * @return the resource
409 *
410 * @throws IOException
411 * in case of I/O error
412 */
413 private InputStream openEx(String url) throws IOException {
414 try {
415 return Instance.getCache().open(new URL(url), this, true,
416 withoutQuery(url));
417 } catch (Exception e) {
418 // second chance
419 try {
420 Thread.sleep(100);
421 } catch (InterruptedException ee) {
422 }
423
424 return Instance.getCache().open(new URL(url), this, true,
425 withoutQuery(url));
426 }
427 }
428
429 /**
430 * Return the same input {@link URL} but without the query part.
431 *
432 * @param url
433 * the inpiut {@link URL} as a {@link String}
434 *
435 * @return the input {@link URL} without query
436 */
437 private URL withoutQuery(String url) {
438 URL o = null;
439 try {
440 // Remove the query from o (originalUrl), so it can be cached
441 // correctly
442 o = new URL(url);
443 o = new URL(o.getProtocol() + "://" + o.getHost() + o.getPath());
444
445 return o;
446 } catch (MalformedURLException e) {
447 return null;
448 }
449 }
450}