Fix tests:
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / MangaFox.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.MalformedURLException;
6 import java.net.URL;
7 import java.util.AbstractMap;
8 import java.util.ArrayList;
9 import java.util.Collections;
10 import java.util.List;
11 import java.util.Map.Entry;
12
13 import org.jsoup.helper.DataUtil;
14 import org.jsoup.nodes.Element;
15 import org.jsoup.select.Elements;
16
17 import be.nikiroo.fanfix.Instance;
18 import be.nikiroo.fanfix.data.MetaData;
19 import be.nikiroo.utils.Image;
20 import be.nikiroo.utils.Progress;
21 import be.nikiroo.utils.StringUtils;
22
23 class MangaFox extends BasicSupport {
24 @Override
25 protected boolean isHtml() {
26 return true;
27 }
28
29 @Override
30 public String getSourceName() {
31 return "MangaFox.me";
32 }
33
34 @Override
35 protected MetaData getMeta() throws IOException {
36 MetaData meta = new MetaData();
37 Element doc = getSourceNode();
38
39 Element title = doc.getElementById("title");
40 Elements table = null;
41 if (title != null) {
42 table = title.getElementsByTag("table");
43 }
44 if (table != null) {
45 // Rows: header, data
46 Elements rows = table.first().getElementsByTag("tr");
47 if (rows.size() > 1) {
48 table = rows.get(1).getElementsByTag("td");
49 // Columns: Realeased, Authors, Artists, Genres
50 if (table.size() < 4) {
51 table = null;
52 }
53 }
54 }
55
56 meta.setTitle(getTitle());
57 if (table != null) {
58 meta.setAuthor(getAuthors(table.get(1).text() + ","
59 + table.get(2).text()));
60
61 meta.setDate(StringUtils.unhtml(table.get(0).text()).trim());
62 meta.setTags(explode(table.get(3).text()));
63 }
64 meta.setSource(getSourceName());
65 meta.setUrl(getSource().toString());
66 meta.setPublisher(getSourceName());
67 meta.setUuid(getSource().toString());
68 meta.setLuid("");
69 meta.setLang("en");
70 meta.setSubject("manga");
71 meta.setType(getType().toString());
72 meta.setImageDocument(true);
73 meta.setCover(getCover());
74
75 return meta;
76 }
77
78 private String getTitle() {
79 Element doc = getSourceNode();
80
81 Element title = doc.getElementById("title");
82 Element h1 = title.getElementsByTag("h1").first();
83 if (h1 != null) {
84 return StringUtils.unhtml(h1.text()).trim();
85 }
86
87 return null;
88 }
89
90 private String getAuthors(String authorList) {
91 String author = "";
92 for (String auth : explode(authorList)) {
93 if (!author.isEmpty()) {
94 author = author + ", ";
95 }
96 author += auth;
97 }
98
99 return author;
100 }
101
102 @Override
103 protected String getDesc() {
104 Element doc = getSourceNode();
105 Element title = doc.getElementsByClass("summary").first();
106 if (title != null) {
107 StringUtils.unhtml(title.text()).trim();
108 }
109
110 return null;
111 }
112
113 private Image getCover() {
114 Element doc = getSourceNode();
115 Element cover = doc.getElementsByClass("cover").first();
116 if (cover != null) {
117 cover = cover.getElementsByTag("img").first();
118 }
119
120 if (cover != null) {
121 String coverUrl = cover.absUrl("src");
122
123 InputStream coverIn;
124 try {
125 coverIn = openEx(coverUrl);
126 try {
127 return new Image(coverIn);
128 } finally {
129 coverIn.close();
130 }
131 } catch (IOException e) {
132 Instance.getTraceHandler().error(e);
133 }
134 }
135
136 return null;
137 }
138
139 @Override
140 protected List<Entry<String, URL>> getChapters(Progress pg) {
141 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
142
143 Element doc = getSourceNode();
144 for (Element li : doc.getElementsByTag("li")) {
145 Element el = li.getElementsByTag("h4").first();
146 if (el == null) {
147 el = li.getElementsByTag("h3").first();
148 }
149 if (el != null) {
150 Element a = el.getElementsByTag("a").first();
151 if (a != null) {
152 String title = StringUtils.unhtml(el.text()).trim();
153 try {
154 String url = a.absUrl("href");
155 if (url.endsWith("1.html")) {
156 url = url.substring(0,
157 url.length() - "1.html".length());
158 }
159 if (!url.endsWith("/")) {
160 url += "/";
161 }
162
163 urls.add(new AbstractMap.SimpleEntry<String, URL>(
164 title, new URL(url)));
165 } catch (Exception e) {
166 Instance.getTraceHandler().error(e);
167 }
168 }
169 }
170 }
171
172 // the chapters are in reversed order
173 Collections.reverse(urls);
174
175 return urls;
176 }
177
178 @Override
179 protected String getChapterContent(URL chapUrl, int number, Progress pg)
180 throws IOException {
181 if (pg == null) {
182 pg = new Progress();
183 }
184
185 StringBuilder builder = new StringBuilder();
186
187 String url = chapUrl.toString();
188 InputStream imageIn = null;
189 Element imageDoc = null;
190
191 // 1. find out how many images there are
192 int size;
193 try {
194 // note: when used, the base URL can be an ad-page
195 imageIn = openEx(url + "1.html");
196 imageDoc = DataUtil.load(imageIn, "UTF-8", url + "1.html");
197 } finally {
198 imageIn.close();
199 }
200 Element select = imageDoc.getElementsByClass("m").first();
201 Elements options = select.getElementsByTag("option");
202 size = options.size() - 1; // last is "Comments"
203
204 pg.setMinMax(0, size);
205
206 // 2. list them
207 for (int i = 1; i <= size; i++) {
208 if (i > 1) { // because fist one was opened for size
209 try {
210 imageIn = openEx(url + i + ".html");
211 imageDoc = DataUtil.load(imageIn, "UTF-8", url + i
212 + ".html");
213 } finally {
214 imageIn.close();
215 }
216 }
217
218 String linkImage = imageDoc.getElementById("image").absUrl("src");
219 if (linkImage != null) {
220 builder.append("[");
221 // to help with the retry and the originalUrl, part 1
222 builder.append(withoutQuery(linkImage));
223 builder.append("]<br/>");
224 }
225
226 // to help with the retry and the originalUrl, part 2
227 refresh(linkImage);
228 }
229
230 return builder.toString();
231 }
232
233 /**
234 * Refresh the {@link URL} by calling {@link MangaFoxNew#openEx(String)}.
235 *
236 * @param url
237 * the URL to refresh
238 *
239 * @return TRUE if it was refreshed
240 */
241 private boolean refresh(String url) {
242 try {
243 openEx(url).close();
244 return true;
245 } catch (Exception e) {
246 return false;
247 }
248 }
249
250 /**
251 * Open the URL through the cache, but: retry a second time after 100ms if
252 * it fails, remove the query part of the {@link URL} before saving it to
253 * the cache (so it can be recalled later).
254 *
255 * @param url
256 * the {@link URL}
257 *
258 * @return the resource
259 *
260 * @throws IOException
261 * in case of I/O error
262 */
263 private InputStream openEx(String url) throws IOException {
264 try {
265 return Instance.getCache().open(new URL(url), this, true,
266 withoutQuery(url));
267 } catch (Exception e) {
268 // second chance
269 try {
270 Thread.sleep(100);
271 } catch (InterruptedException ee) {
272 }
273
274 return Instance.getCache().open(new URL(url), this, true,
275 withoutQuery(url));
276 }
277 }
278
279 /**
280 * Return the same input {@link URL} but without the query part.
281 *
282 * @param url
283 * the inpiut {@link URL} as a {@link String}
284 *
285 * @return the input {@link URL} without query
286 */
287 private URL withoutQuery(String url) {
288 URL o = null;
289 try {
290 // Remove the query from o (originalUrl), so it can be cached
291 // correctly
292 o = new URL(url);
293 o = new URL(o.getProtocol() + "://" + o.getHost() + o.getPath());
294
295 return o;
296 } catch (MalformedURLException e) {
297 return null;
298 }
299 }
300
301 /**
302 * Explode an HTML comma-separated list of values into a non-duplicate text
303 * {@link List} .
304 *
305 * @param values
306 * the comma-separated values in HTML format
307 *
308 * @return the full list with no duplicate in text format
309 */
310 private List<String> explode(String values) {
311 List<String> list = new ArrayList<String>();
312 if (values != null && !values.isEmpty()) {
313 for (String auth : values.split(",")) {
314 String a = StringUtils.unhtml(auth).trim();
315 if (!a.isEmpty() && !list.contains(a.trim())) {
316 list.add(a);
317 }
318 }
319 }
320
321 return list;
322 }
323
324 @Override
325 protected boolean supports(URL url) {
326 return "mangafox.me".equals(url.getHost())
327 || "www.mangafox.me".equals(url.getHost())
328 || "fanfox.net".equals(url.getHost())
329 || "www.fanfox.net".equals(url.getHost());
330 }
331 }