New: MangaLel support (manga, FR)
[fanfix.git] / src / be / nikiroo / fanfix / supported / MangaFox.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
3import java.io.IOException;
4import java.io.InputStream;
5import java.net.MalformedURLException;
6import java.net.URL;
cb554033 7import java.util.AbstractMap;
08fe2e33
NR
8import java.util.ArrayList;
9import java.util.Collections;
10import java.util.List;
11import java.util.Map.Entry;
41c3bba7
NR
12import java.util.SortedMap;
13import java.util.TreeMap;
cb554033
NR
14
15import org.jsoup.helper.DataUtil;
16import org.jsoup.nodes.Element;
17import org.jsoup.select.Elements;
08fe2e33
NR
18
19import be.nikiroo.fanfix.Instance;
68686a37 20import be.nikiroo.fanfix.data.MetaData;
16a81ef7 21import be.nikiroo.utils.Image;
ed08c171 22import be.nikiroo.utils.Progress;
08fe2e33
NR
23import be.nikiroo.utils.StringUtils;
24
cb554033 25class MangaFox extends BasicSupport {
08fe2e33
NR
26 @Override
27 protected boolean isHtml() {
28 return true;
29 }
30
31 @Override
32 public String getSourceName() {
d3c15421 33 return "MangaFox.me";
08fe2e33
NR
34 }
35
36 @Override
cb554033 37 protected MetaData getMeta() throws IOException {
68686a37 38 MetaData meta = new MetaData();
cb554033
NR
39 Element doc = getSourceNode();
40
41 Element title = doc.getElementById("title");
42 Elements table = null;
43 if (title != null) {
44 table = title.getElementsByTag("table");
45 }
46 if (table != null) {
47 // Rows: header, data
48 Elements rows = table.first().getElementsByTag("tr");
49 if (rows.size() > 1) {
50 table = rows.get(1).getElementsByTag("td");
51 // Columns: Realeased, Authors, Artists, Genres
52 if (table.size() < 4) {
53 table = null;
54 }
55 }
56 }
68686a37 57
cb554033
NR
58 meta.setTitle(getTitle());
59 if (table != null) {
60 meta.setAuthor(getAuthors(table.get(1).text() + ","
61 + table.get(2).text()));
62
63 meta.setDate(StringUtils.unhtml(table.get(0).text()).trim());
64 meta.setTags(explode(table.get(3).text()));
65 }
68686a37 66 meta.setSource(getSourceName());
cb554033 67 meta.setUrl(getSource().toString());
68686a37 68 meta.setPublisher(getSourceName());
cb554033 69 meta.setUuid(getSource().toString());
68686a37 70 meta.setLuid("");
276f95c6 71 meta.setLang("en");
68686a37
NR
72 meta.setSubject("manga");
73 meta.setType(getType().toString());
74 meta.setImageDocument(true);
cb554033 75 meta.setCover(getCover());
68686a37
NR
76
77 return meta;
08fe2e33
NR
78 }
79
cb554033
NR
80 private String getTitle() {
81 Element doc = getSourceNode();
08fe2e33 82
cb554033
NR
83 Element title = doc.getElementById("title");
84 Element h1 = title.getElementsByTag("h1").first();
85 if (h1 != null) {
86 return StringUtils.unhtml(h1.text()).trim();
08fe2e33
NR
87 }
88
89 return null;
90 }
91
cb554033
NR
92 private String getAuthors(String authorList) {
93 String author = "";
94 for (String auth : explode(authorList)) {
95 if (!author.isEmpty()) {
96 author = author + ", ";
08fe2e33 97 }
cb554033 98 author += auth;
08fe2e33
NR
99 }
100
cb554033 101 return author;
08fe2e33
NR
102 }
103
104 @Override
cb554033
NR
105 protected String getDesc() {
106 Element doc = getSourceNode();
107 Element title = doc.getElementsByClass("summary").first();
108 if (title != null) {
af1f506f 109 return StringUtils.unhtml(title.text()).trim();
08fe2e33
NR
110 }
111
112 return null;
113 }
114
cb554033
NR
115 private Image getCover() {
116 Element doc = getSourceNode();
117 Element cover = doc.getElementsByClass("cover").first();
118 if (cover != null) {
119 cover = cover.getElementsByTag("img").first();
08fe2e33
NR
120 }
121
122 if (cover != null) {
cb554033
NR
123 String coverUrl = cover.absUrl("src");
124
68686a37 125 InputStream coverIn;
08fe2e33 126 try {
cb554033 127 coverIn = openEx(coverUrl);
68686a37 128 try {
16a81ef7 129 return new Image(coverIn);
68686a37
NR
130 } finally {
131 coverIn.close();
132 }
133 } catch (IOException e) {
cb554033 134 Instance.getTraceHandler().error(e);
08fe2e33
NR
135 }
136 }
137
138 return null;
139 }
140
141 @Override
cb554033 142 protected List<Entry<String, URL>> getChapters(Progress pg) {
08fe2e33
NR
143 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
144
41c3bba7
NR
145 String prefix = null; // each chapter starts with this prefix, then a
146 // chapter number (including "x.5"), then name
147
cb554033
NR
148 Element doc = getSourceNode();
149 for (Element li : doc.getElementsByTag("li")) {
150 Element el = li.getElementsByTag("h4").first();
151 if (el == null) {
152 el = li.getElementsByTag("h3").first();
08fe2e33 153 }
cb554033
NR
154 if (el != null) {
155 Element a = el.getElementsByTag("a").first();
156 if (a != null) {
157 String title = StringUtils.unhtml(el.text()).trim();
158 try {
159 String url = a.absUrl("href");
160 if (url.endsWith("1.html")) {
161 url = url.substring(0,
162 url.length() - "1.html".length());
08fe2e33 163 }
cb554033
NR
164 if (!url.endsWith("/")) {
165 url += "/";
08fe2e33
NR
166 }
167
41c3bba7
NR
168 if (prefix == null || !prefix.isEmpty()) {
169 StringBuilder possiblePrefix = new StringBuilder(
170 StringUtils.unhtml(a.text()).trim());
171 while (possiblePrefix.length() > 0) {
172 char car = possiblePrefix.charAt(possiblePrefix
173 .length() - 1);
174 boolean punctuation = (car == '.' || car == ' ');
175 boolean digit = (car >= '0' && car <= '9');
176 if (!punctuation && !digit) {
177 break;
178 }
179
180 possiblePrefix.setLength(possiblePrefix
181 .length() - 1);
182 }
183
184 if (prefix == null) {
185 prefix = possiblePrefix.toString();
186 }
187
188 if (!prefix.equalsIgnoreCase(possiblePrefix
189 .toString())) {
190 prefix = ""; // prefix not ok
191 }
192 }
193
cb554033
NR
194 urls.add(new AbstractMap.SimpleEntry<String, URL>(
195 title, new URL(url)));
196 } catch (Exception e) {
197 Instance.getTraceHandler().error(e);
198 }
08fe2e33
NR
199 }
200 }
201 }
202
41c3bba7
NR
203 if (prefix != null && !prefix.isEmpty()) {
204 try {
205 // We found a prefix, so everything should be sortable
206 SortedMap<Double, Entry<String, URL>> map = new TreeMap<Double, Entry<String, URL>>();
207 for (Entry<String, URL> entry : urls) {
208 String num = entry.getKey().substring(prefix.length() + 1)
209 .trim();
210 String name = "";
211 int pos = num.indexOf(' ');
212 if (pos >= 0) {
213 name = num.substring(pos).trim();
214 num = num.substring(0, pos).trim();
215 }
216
217 if (!name.isEmpty()) {
218 name = "Tome " + num + ": " + name;
219 } else {
220 name = "Tome " + num;
221 }
222
223 double key = Double.parseDouble(num);
224
225 map.put(key, new AbstractMap.SimpleEntry<String, URL>(name,
226 entry.getValue()));
227 }
228 urls = new ArrayList<Entry<String, URL>>(map.values());
229 } catch (NumberFormatException e) {
230 Instance.getTraceHandler()
231 .error(new IOException(
232 "Cannot find a tome number, revert to default sorting",
233 e));
234 // by default, the chapters are in reversed order
235 Collections.reverse(urls);
236 }
237 } else {
238 // by default, the chapters are in reversed order
239 Collections.reverse(urls);
240 }
08fe2e33
NR
241
242 return urls;
243 }
244
245 @Override
cb554033
NR
246 protected String getChapterContent(URL chapUrl, int number, Progress pg)
247 throws IOException {
ed08c171
NR
248 if (pg == null) {
249 pg = new Progress();
ed08c171
NR
250 }
251
08fe2e33 252 StringBuilder builder = new StringBuilder();
08fe2e33 253
cb554033
NR
254 String url = chapUrl.toString();
255 InputStream imageIn = null;
256 Element imageDoc = null;
257
258 // 1. find out how many images there are
259 int size;
260 try {
261 // note: when used, the base URL can be an ad-page
262 imageIn = openEx(url + "1.html");
263 imageDoc = DataUtil.load(imageIn, "UTF-8", url + "1.html");
87a7a0a5
NR
264 } catch (IOException e) {
265 Instance.getTraceHandler().error(
266 new IOException("Cannot get image " + 1 + " of manga", e));
cb554033 267 } finally {
87a7a0a5
NR
268 if (imageIn != null) {
269 imageIn.close();
270 }
cb554033
NR
271 }
272 Element select = imageDoc.getElementsByClass("m").first();
273 Elements options = select.getElementsByTag("option");
274 size = options.size() - 1; // last is "Comments"
275
276 pg.setMinMax(0, size);
277
278 // 2. list them
279 for (int i = 1; i <= size; i++) {
87a7a0a5 280 if (i > 1) { // because first one was opened for size
cb554033
NR
281 try {
282 imageIn = openEx(url + i + ".html");
283 imageDoc = DataUtil.load(imageIn, "UTF-8", url + i
284 + ".html");
87a7a0a5
NR
285
286 String linkImage = imageDoc.getElementById("image").absUrl(
287 "src");
288 if (linkImage != null) {
289 builder.append("[");
290 // to help with the retry and the originalUrl, part 1
291 builder.append(withoutQuery(linkImage));
292 builder.append("]<br/>");
293 }
294
295 // to help with the retry and the originalUrl, part 2
296 refresh(linkImage);
297 } catch (IOException e) {
298 Instance.getTraceHandler().error(
299 new IOException("Cannot get image " + i
300 + " of manga", e));
cb554033 301 } finally {
87a7a0a5
NR
302 if (imageIn != null) {
303 imageIn.close();
304 }
08fe2e33
NR
305 }
306 }
08fe2e33
NR
307 }
308
08fe2e33
NR
309 return builder.toString();
310 }
311
08fe2e33 312 /**
41c3bba7 313 * Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}.
08fe2e33
NR
314 *
315 * @param url
316 * the URL to refresh
317 *
318 * @return TRUE if it was refreshed
319 */
320 private boolean refresh(String url) {
321 try {
322 openEx(url).close();
323 return true;
324 } catch (Exception e) {
325 return false;
326 }
327 }
328
329 /**
330 * Open the URL through the cache, but: retry a second time after 100ms if
331 * it fails, remove the query part of the {@link URL} before saving it to
332 * the cache (so it can be recalled later).
333 *
334 * @param url
335 * the {@link URL}
336 *
337 * @return the resource
338 *
339 * @throws IOException
340 * in case of I/O error
341 */
342 private InputStream openEx(String url) throws IOException {
343 try {
344 return Instance.getCache().open(new URL(url), this, true,
345 withoutQuery(url));
346 } catch (Exception e) {
347 // second chance
348 try {
349 Thread.sleep(100);
350 } catch (InterruptedException ee) {
351 }
352
353 return Instance.getCache().open(new URL(url), this, true,
354 withoutQuery(url));
355 }
356 }
357
358 /**
359 * Return the same input {@link URL} but without the query part.
360 *
361 * @param url
362 * the inpiut {@link URL} as a {@link String}
363 *
364 * @return the input {@link URL} without query
365 */
366 private URL withoutQuery(String url) {
367 URL o = null;
368 try {
369 // Remove the query from o (originalUrl), so it can be cached
370 // correctly
371 o = new URL(url);
372 o = new URL(o.getProtocol() + "://" + o.getHost() + o.getPath());
373
374 return o;
375 } catch (MalformedURLException e) {
376 return null;
377 }
378 }
cb554033
NR
379
380 /**
381 * Explode an HTML comma-separated list of values into a non-duplicate text
382 * {@link List} .
383 *
384 * @param values
385 * the comma-separated values in HTML format
386 *
387 * @return the full list with no duplicate in text format
388 */
389 private List<String> explode(String values) {
390 List<String> list = new ArrayList<String>();
391 if (values != null && !values.isEmpty()) {
392 for (String auth : values.split(",")) {
393 String a = StringUtils.unhtml(auth).trim();
394 if (!a.isEmpty() && !list.contains(a.trim())) {
395 list.add(a);
396 }
397 }
398 }
399
400 return list;
401 }
402
403 @Override
404 protected boolean supports(URL url) {
405 return "mangafox.me".equals(url.getHost())
406 || "www.mangafox.me".equals(url.getHost())
407 || "fanfox.net".equals(url.getHost())
408 || "www.fanfox.net".equals(url.getHost());
409 }
08fe2e33 410}