test: improve flag files
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / MangaFox.java
... / ...
CommitLineData
1package be.nikiroo.fanfix.supported;
2
3import java.io.IOException;
4import java.io.InputStream;
5import java.net.MalformedURLException;
6import java.net.URL;
7import java.util.AbstractMap;
8import java.util.ArrayList;
9import java.util.Collections;
10import java.util.List;
11import java.util.Map.Entry;
12import java.util.SortedMap;
13import java.util.TreeMap;
14
15import org.jsoup.helper.DataUtil;
16import org.jsoup.nodes.Element;
17import org.jsoup.select.Elements;
18
19import be.nikiroo.fanfix.Instance;
20import be.nikiroo.fanfix.data.MetaData;
21import be.nikiroo.utils.Image;
22import be.nikiroo.utils.Progress;
23import be.nikiroo.utils.StringUtils;
24
25class MangaFox extends BasicSupport {
26 @Override
27 protected boolean isHtml() {
28 return true;
29 }
30
31 @Override
32 protected MetaData getMeta() throws IOException {
33 MetaData meta = new MetaData();
34 Element doc = getSourceNode();
35
36 Element title = doc.getElementById("title");
37 Elements table = null;
38 if (title != null) {
39 table = title.getElementsByTag("table");
40 }
41 if (table != null) {
42 // Rows: header, data
43 Elements rows = table.first().getElementsByTag("tr");
44 if (rows.size() > 1) {
45 table = rows.get(1).getElementsByTag("td");
46 // Columns: Realeased, Authors, Artists, Genres
47 if (table.size() < 4) {
48 table = null;
49 }
50 }
51 }
52
53 meta.setTitle(getTitle());
54 if (table != null) {
55 meta.setAuthor(getAuthors(table.get(1).text() + ","
56 + table.get(2).text()));
57
58 meta.setDate(StringUtils.unhtml(table.get(0).text()).trim());
59 meta.setTags(explode(table.get(3).text()));
60 }
61 meta.setSource(getType().getSourceName());
62 meta.setUrl(getSource().toString());
63 meta.setPublisher(getType().getSourceName());
64 meta.setUuid(getSource().toString());
65 meta.setLuid("");
66 meta.setLang("en");
67 meta.setSubject("manga");
68 meta.setType(getType().toString());
69 meta.setImageDocument(true);
70 meta.setCover(getCover());
71
72 return meta;
73 }
74
75 private String getTitle() {
76 Element doc = getSourceNode();
77
78 Element title = doc.getElementById("title");
79 Element h1 = title.getElementsByTag("h1").first();
80 if (h1 != null) {
81 return StringUtils.unhtml(h1.text()).trim();
82 }
83
84 return null;
85 }
86
87 private String getAuthors(String authorList) {
88 String author = "";
89 for (String auth : explode(authorList)) {
90 if (!author.isEmpty()) {
91 author = author + ", ";
92 }
93 author += auth;
94 }
95
96 return author;
97 }
98
99 @Override
100 protected String getDesc() {
101 Element doc = getSourceNode();
102 Element title = doc.getElementsByClass("summary").first();
103 if (title != null) {
104 return StringUtils.unhtml(title.text()).trim();
105 }
106
107 return null;
108 }
109
110 private Image getCover() {
111 Element doc = getSourceNode();
112 Element cover = doc.getElementsByClass("cover").first();
113 if (cover != null) {
114 cover = cover.getElementsByTag("img").first();
115 }
116
117 if (cover != null) {
118 String coverUrl = cover.absUrl("src");
119
120 InputStream coverIn;
121 try {
122 coverIn = openEx(coverUrl);
123 try {
124 return new Image(coverIn);
125 } finally {
126 coverIn.close();
127 }
128 } catch (IOException e) {
129 Instance.getTraceHandler().error(e);
130 }
131 }
132
133 return null;
134 }
135
136 @Override
137 protected List<Entry<String, URL>> getChapters(Progress pg) {
138 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
139
140 String prefix = null; // each chapter starts with this prefix, then a
141 // chapter number (including "x.5"), then name
142
143 Element doc = getSourceNode();
144 for (Element li : doc.getElementsByTag("li")) {
145 Element el = li.getElementsByTag("h4").first();
146 if (el == null) {
147 el = li.getElementsByTag("h3").first();
148 }
149 if (el != null) {
150 Element a = el.getElementsByTag("a").first();
151 if (a != null) {
152 String title = StringUtils.unhtml(el.text()).trim();
153 try {
154 String url = a.absUrl("href");
155 if (url.endsWith("1.html")) {
156 url = url.substring(0,
157 url.length() - "1.html".length());
158 }
159 if (!url.endsWith("/")) {
160 url += "/";
161 }
162
163 if (prefix == null || !prefix.isEmpty()) {
164 StringBuilder possiblePrefix = new StringBuilder(
165 StringUtils.unhtml(a.text()).trim());
166 while (possiblePrefix.length() > 0) {
167 char car = possiblePrefix.charAt(possiblePrefix
168 .length() - 1);
169 boolean punctuation = (car == '.' || car == ' ');
170 boolean digit = (car >= '0' && car <= '9');
171 if (!punctuation && !digit) {
172 break;
173 }
174
175 possiblePrefix.setLength(possiblePrefix
176 .length() - 1);
177 }
178
179 if (prefix == null) {
180 prefix = possiblePrefix.toString();
181 }
182
183 if (!prefix.equalsIgnoreCase(possiblePrefix
184 .toString())) {
185 prefix = ""; // prefix not ok
186 }
187 }
188
189 urls.add(new AbstractMap.SimpleEntry<String, URL>(
190 title, new URL(url)));
191 } catch (Exception e) {
192 Instance.getTraceHandler().error(e);
193 }
194 }
195 }
196 }
197
198 if (prefix != null && !prefix.isEmpty()) {
199 try {
200 // We found a prefix, so everything should be sortable
201 SortedMap<Double, Entry<String, URL>> map = new TreeMap<Double, Entry<String, URL>>();
202 for (Entry<String, URL> entry : urls) {
203 String num = entry.getKey().substring(prefix.length() + 1)
204 .trim();
205 String name = "";
206 int pos = num.indexOf(' ');
207 if (pos >= 0) {
208 name = num.substring(pos).trim();
209 num = num.substring(0, pos).trim();
210 }
211
212 if (!name.isEmpty()) {
213 name = "Tome " + num + ": " + name;
214 } else {
215 name = "Tome " + num;
216 }
217
218 double key = Double.parseDouble(num);
219
220 map.put(key, new AbstractMap.SimpleEntry<String, URL>(name,
221 entry.getValue()));
222 }
223 urls = new ArrayList<Entry<String, URL>>(map.values());
224 } catch (NumberFormatException e) {
225 Instance.getTraceHandler()
226 .error(new IOException(
227 "Cannot find a tome number, revert to default sorting",
228 e));
229 // by default, the chapters are in reversed order
230 Collections.reverse(urls);
231 }
232 } else {
233 // by default, the chapters are in reversed order
234 Collections.reverse(urls);
235 }
236
237 return urls;
238 }
239
240 @Override
241 protected String getChapterContent(URL chapUrl, int number, Progress pg)
242 throws IOException {
243 if (pg == null) {
244 pg = new Progress();
245 }
246
247 StringBuilder builder = new StringBuilder();
248
249 String url = chapUrl.toString();
250 InputStream imageIn = null;
251 Element imageDoc = null;
252
253 // 1. find out how many images there are
254 int size;
255 try {
256 // note: when used, the base URL can be an ad-page
257 imageIn = openEx(url + "1.html");
258 imageDoc = DataUtil.load(imageIn, "UTF-8", url + "1.html");
259 } catch (IOException e) {
260 Instance.getTraceHandler().error(
261 new IOException("Cannot get image " + 1 + " of manga", e));
262 } finally {
263 if (imageIn != null) {
264 imageIn.close();
265 }
266 }
267 Element select = imageDoc.getElementsByClass("m").first();
268 Elements options = select.getElementsByTag("option");
269 size = options.size() - 1; // last is "Comments"
270
271 pg.setMinMax(0, size);
272
273 // 2. list them
274 for (int i = 1; i <= size; i++) {
275 if (i > 1) { // because first one was opened for size
276 try {
277 imageIn = openEx(url + i + ".html");
278 imageDoc = DataUtil.load(imageIn, "UTF-8", url + i
279 + ".html");
280
281 String linkImage = imageDoc.getElementById("image").absUrl(
282 "src");
283 if (linkImage != null) {
284 builder.append("[");
285 // to help with the retry and the originalUrl, part 1
286 builder.append(withoutQuery(linkImage));
287 builder.append("]<br/>");
288 }
289
290 // to help with the retry and the originalUrl, part 2
291 refresh(linkImage);
292 } catch (IOException e) {
293 Instance.getTraceHandler().error(
294 new IOException("Cannot get image " + i
295 + " of manga", e));
296 } finally {
297 if (imageIn != null) {
298 imageIn.close();
299 }
300 }
301 }
302 }
303
304 return builder.toString();
305 }
306
307 /**
308 * Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}.
309 *
310 * @param url
311 * the URL to refresh
312 *
313 * @return TRUE if it was refreshed
314 */
315 private boolean refresh(String url) {
316 try {
317 openEx(url).close();
318 return true;
319 } catch (Exception e) {
320 return false;
321 }
322 }
323
324 /**
325 * Open the URL through the cache, but: retry a second time after 100ms if
326 * it fails, remove the query part of the {@link URL} before saving it to
327 * the cache (so it can be recalled later).
328 *
329 * @param url
330 * the {@link URL}
331 *
332 * @return the resource
333 *
334 * @throws IOException
335 * in case of I/O error
336 */
337 private InputStream openEx(String url) throws IOException {
338 try {
339 return Instance.getCache().open(new URL(url), withoutQuery(url),
340 this, true);
341 } catch (Exception e) {
342 // second chance
343 try {
344 Thread.sleep(100);
345 } catch (InterruptedException ee) {
346 }
347
348 return Instance.getCache().open(new URL(url), withoutQuery(url),
349 this, true);
350 }
351 }
352
353 /**
354 * Return the same input {@link URL} but without the query part.
355 *
356 * @param url
357 * the inpiut {@link URL} as a {@link String}
358 *
359 * @return the input {@link URL} without query
360 */
361 private URL withoutQuery(String url) {
362 URL o = null;
363 try {
364 // Remove the query from o (originalUrl), so it can be cached
365 // correctly
366 o = new URL(url);
367 o = new URL(o.getProtocol() + "://" + o.getHost() + o.getPath());
368
369 return o;
370 } catch (MalformedURLException e) {
371 return null;
372 }
373 }
374
375 /**
376 * Explode an HTML comma-separated list of values into a non-duplicate text
377 * {@link List} .
378 *
379 * @param values
380 * the comma-separated values in HTML format
381 *
382 * @return the full list with no duplicate in text format
383 */
384 private List<String> explode(String values) {
385 List<String> list = new ArrayList<String>();
386 if (values != null && !values.isEmpty()) {
387 for (String auth : values.split(",")) {
388 String a = StringUtils.unhtml(auth).trim();
389 if (!a.isEmpty() && !list.contains(a.trim())) {
390 list.add(a);
391 }
392 }
393 }
394
395 return list;
396 }
397
398 @Override
399 protected boolean supports(URL url) {
400 return "mangafox.me".equals(url.getHost())
401 || "www.mangafox.me".equals(url.getHost())
402 || "fanfox.net".equals(url.getHost())
403 || "www.fanfox.net".equals(url.getHost());
404 }
405}