Version 1.2.3: new supported type: HTML
[fanfix.git] / src / be / nikiroo / fanfix / supported / MangaFox.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.awt.image.BufferedImage;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.net.MalformedURLException;
7 import java.net.URL;
8 import java.util.ArrayList;
9 import java.util.Collections;
10 import java.util.List;
11 import java.util.Map.Entry;
12 import java.util.Scanner;
13
14 import be.nikiroo.fanfix.Instance;
15 import be.nikiroo.fanfix.data.MetaData;
16 import be.nikiroo.utils.IOUtils;
17 import be.nikiroo.utils.StringUtils;
18
19 class MangaFox extends BasicSupport {
20 @Override
21 protected boolean isHtml() {
22 return true;
23 }
24
25 @Override
26 public String getSourceName() {
27 return "MangaFox.me";
28 }
29
30 @Override
31 protected MetaData getMeta(URL source, InputStream in) throws IOException {
32 MetaData meta = new MetaData();
33
34 meta.setTitle(getTitle(reset(in)));
35 meta.setAuthor(getAuthor(reset(in)));
36 meta.setDate(getDate(reset(in)));
37 meta.setTags(getTags(reset(in)));
38 meta.setSource(getSourceName());
39 meta.setUrl(source.toString());
40 meta.setPublisher(getSourceName());
41 meta.setUuid(source.toString());
42 meta.setLuid("");
43 meta.setLang("EN");
44 meta.setSubject("manga");
45 meta.setType(getType().toString());
46 meta.setImageDocument(true);
47 meta.setCover(getCover(reset(in)));
48
49 return meta;
50 }
51
52 private List<String> getTags(InputStream in) {
53 List<String> tags = new ArrayList<String>();
54
55 String line = getLine(in, "/genres/", 0);
56 if (line != null) {
57 line = StringUtils.unhtml(line);
58 String[] tab = line.split(",");
59 if (tab != null) {
60 for (String tag : tab) {
61 tags.add(tag.trim());
62 }
63 }
64 }
65
66 return tags;
67 }
68
69 private String getTitle(InputStream in) {
70 String line = getLine(in, " property=\"og:title\"", 0);
71 if (line != null) {
72 int pos = -1;
73 for (int i = 0; i < 3; i++) {
74 pos = line.indexOf('"', pos + 1);
75 }
76
77 if (pos >= 0) {
78 line = line.substring(pos + 1);
79 pos = line.indexOf('"');
80 if (pos >= 0) {
81 return line.substring(0, pos);
82 }
83 }
84 }
85
86 return null;
87 }
88
89 private String getAuthor(InputStream in) {
90 List<String> authors = new ArrayList<String>();
91
92 String line = getLine(in, "/author/", 0, false);
93 if (line != null) {
94 for (String ln : StringUtils.unhtml(line).split(",")) {
95 if (ln != null && !ln.trim().isEmpty()
96 && !authors.contains(ln.trim())) {
97 authors.add(ln.trim());
98 }
99 }
100 }
101
102 try {
103 in.reset();
104 } catch (IOException e) {
105 Instance.syserr(e);
106 }
107
108 line = getLine(in, "/artist/", 0, false);
109 if (line != null) {
110 for (String ln : StringUtils.unhtml(line).split(",")) {
111 if (ln != null && !ln.trim().isEmpty()
112 && !authors.contains(ln.trim())) {
113 authors.add(ln.trim());
114 }
115 }
116 }
117
118 if (authors.isEmpty()) {
119 return null;
120 } else {
121 StringBuilder builder = new StringBuilder();
122 for (String author : authors) {
123 if (builder.length() > 0) {
124 builder.append(", ");
125 }
126
127 builder.append(author);
128 }
129
130 return builder.toString();
131 }
132 }
133
134 private String getDate(InputStream in) {
135 String line = getLine(in, "/released/", 0);
136 if (line != null) {
137 line = StringUtils.unhtml(line);
138 return line.trim();
139 }
140
141 return null;
142 }
143
144 @Override
145 protected String getDesc(URL source, InputStream in) {
146 String line = getLine(in, " property=\"og:description\"", 0);
147 if (line != null) {
148 int pos = -1;
149 for (int i = 0; i < 3; i++) {
150 pos = line.indexOf('"', pos + 1);
151 }
152
153 if (pos >= 0) {
154 line = line.substring(pos + 1);
155 pos = line.indexOf('"');
156 if (pos >= 0) {
157 return line.substring(0, pos);
158 }
159 }
160 }
161
162 return null;
163 }
164
165 private BufferedImage getCover(InputStream in) {
166 String line = getLine(in, " property=\"og:image\"", 0);
167 String cover = null;
168 if (line != null) {
169 int pos = -1;
170 for (int i = 0; i < 3; i++) {
171 pos = line.indexOf('"', pos + 1);
172 }
173
174 if (pos >= 0) {
175 line = line.substring(pos + 1);
176 pos = line.indexOf('"');
177 if (pos >= 0) {
178 cover = line.substring(0, pos);
179 }
180 }
181 }
182
183 if (cover != null) {
184 InputStream coverIn;
185 try {
186 coverIn = openEx(cover);
187 try {
188 return IOUtils.toImage(coverIn);
189 } finally {
190 coverIn.close();
191 }
192 } catch (IOException e) {
193 }
194 }
195
196 return null;
197 }
198
199 @Override
200 protected List<Entry<String, URL>> getChapters(URL source, InputStream in) {
201 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
202
203 String volumeAt = "<h3 class=\"volume\">";
204 String linkAt = "href=\"http://mangafox.me/";
205 String endAt = "<script type=\"text/javascript\">";
206
207 boolean started = false;
208
209 @SuppressWarnings("resource")
210 Scanner scan = new Scanner(in, "UTF-8");
211 scan.useDelimiter("\\n");
212 while (scan.hasNext()) {
213 String line = scan.next();
214
215 if (started && line.contains(endAt)) {
216 break;
217 } else if (!started && line.contains(volumeAt)) {
218 started = true;
219 }
220
221 if (started && line.contains(linkAt)) {
222 // Chapter content url
223 String url = null;
224 int pos = line.indexOf("href=\"");
225 if (pos >= 0) {
226 line = line.substring(pos + "href=\"".length());
227 pos = line.indexOf('\"');
228 if (pos >= 0) {
229 url = line.substring(0, pos);
230 }
231 }
232
233 // Chapter name
234 String name = null;
235 if (scan.hasNext()) {
236 name = StringUtils.unhtml(scan.next()).trim();
237 // Remove the "new" tag if present
238 if (name.endsWith("new")) {
239 name = name.substring(0, name.length() - 3).trim();
240 }
241 }
242
243 // to help with the retry and the originalUrl
244 refresh(url);
245
246 try {
247 final String key = name;
248 final URL value = new URL(url);
249 urls.add(new Entry<String, URL>() {
250 public URL setValue(URL value) {
251 return null;
252 }
253
254 public String getKey() {
255 return key;
256 }
257
258 public URL getValue() {
259 return value;
260 }
261 });
262 } catch (MalformedURLException e) {
263 Instance.syserr(e);
264 }
265 }
266 }
267
268 // the chapters are in reversed order
269 Collections.reverse(urls);
270
271 return urls;
272 }
273
274 @Override
275 protected String getChapterContent(URL source, InputStream in, int number) {
276 StringBuilder builder = new StringBuilder();
277 String base = getCurrentReferer().toString();
278 int pos = base.lastIndexOf('/');
279 base = base.substring(0, pos + 1); // including the '/' at the end
280
281 boolean close = false;
282 while (in != null) {
283 String linkNextLine = getLine(in, "return enlarge()", 0);
284 try {
285 in.reset();
286 } catch (IOException e) {
287 Instance.syserr(e);
288 }
289
290 String linkImageLine = getLine(in, "return enlarge()", 1);
291 String linkNext = null;
292 String linkImage = null;
293 pos = linkNextLine.indexOf("href=\"");
294 if (pos >= 0) {
295 linkNextLine = linkNextLine.substring(pos + "href=\"".length());
296 pos = linkNextLine.indexOf('\"');
297 if (pos >= 0) {
298 linkNext = linkNextLine.substring(0, pos);
299 }
300 }
301 pos = linkImageLine.indexOf("src=\"");
302 if (pos >= 0) {
303 linkImageLine = linkImageLine
304 .substring(pos + "src=\"".length());
305 pos = linkImageLine.indexOf('\"');
306 if (pos >= 0) {
307 linkImage = linkImageLine.substring(0, pos);
308 }
309 }
310
311 if (linkImage != null) {
312 builder.append("[");
313 // to help with the retry and the originalUrl, part 1
314 builder.append(withoutQuery(linkImage));
315 builder.append("]\n");
316 }
317
318 // to help with the retry and the originalUrl, part 2
319 refresh(linkImage);
320
321 if (close) {
322 try {
323 in.close();
324 } catch (IOException e) {
325 Instance.syserr(e);
326 }
327 }
328
329 in = null;
330 if (linkNext != null && !"javascript:void(0);".equals(linkNext)) {
331 URL url;
332 try {
333 url = new URL(base + linkNext);
334 in = openEx(base + linkNext);
335 setCurrentReferer(url);
336 } catch (IOException e) {
337 Instance.syserr(new IOException(
338 "Cannot get the next manga page which is: "
339 + linkNext, e));
340 }
341 }
342
343 close = true;
344 }
345
346 setCurrentReferer(source);
347 return builder.toString();
348 }
349
350 @Override
351 protected boolean supports(URL url) {
352 return "mangafox.me".equals(url.getHost())
353 || "www.mangafox.me".equals(url.getHost());
354 }
355
356 /**
357 * Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}.
358 *
359 * @param url
360 * the URL to refresh
361 *
362 * @return TRUE if it was refreshed
363 */
364 private boolean refresh(String url) {
365 try {
366 openEx(url).close();
367 return true;
368 } catch (Exception e) {
369 return false;
370 }
371 }
372
373 /**
374 * Open the URL through the cache, but: retry a second time after 100ms if
375 * it fails, remove the query part of the {@link URL} before saving it to
376 * the cache (so it can be recalled later).
377 *
378 * @param url
379 * the {@link URL}
380 *
381 * @return the resource
382 *
383 * @throws IOException
384 * in case of I/O error
385 */
386 private InputStream openEx(String url) throws IOException {
387 try {
388 return Instance.getCache().open(new URL(url), this, true,
389 withoutQuery(url));
390 } catch (Exception e) {
391 // second chance
392 try {
393 Thread.sleep(100);
394 } catch (InterruptedException ee) {
395 }
396
397 return Instance.getCache().open(new URL(url), this, true,
398 withoutQuery(url));
399 }
400 }
401
402 /**
403 * Return the same input {@link URL} but without the query part.
404 *
405 * @param url
406 * the inpiut {@link URL} as a {@link String}
407 *
408 * @return the input {@link URL} without query
409 */
410 private URL withoutQuery(String url) {
411 URL o = null;
412 try {
413 // Remove the query from o (originalUrl), so it can be cached
414 // correctly
415 o = new URL(url);
416 o = new URL(o.getProtocol() + "://" + o.getHost() + o.getPath());
417
418 return o;
419 } catch (MalformedURLException e) {
420 return null;
421 }
422 }
423 }