mangafox: fix mangafox, but site is too full of javascript and obvious anti-copy...
[fanfix.git] / src / be / nikiroo / fanfix / supported / MangaFox.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.MalformedURLException;
6 import java.net.URL;
7 import java.util.AbstractMap;
8 import java.util.ArrayList;
9 import java.util.Collections;
10 import java.util.List;
11 import java.util.Map.Entry;
12
13 import org.jsoup.helper.DataUtil;
14 import org.jsoup.nodes.Document;
15 import org.jsoup.nodes.Element;
16
17 import be.nikiroo.fanfix.Instance;
18 import be.nikiroo.fanfix.data.MetaData;
19 import be.nikiroo.utils.Image;
20 import be.nikiroo.utils.Progress;
21 import be.nikiroo.utils.StringUtils;
22
23 class MangaFox extends BasicSupport {
24 @Override
25 protected boolean isHtml() {
26 return true;
27 }
28
29 @Override
30 protected MetaData getMeta() throws IOException {
31 MetaData meta = new MetaData();
32
33 meta.setTitle(getTitle());
34 // No date anymore on mangafox
35 // meta.setDate();
36 meta.setAuthor(getAuthor());
37 meta.setTags(getTags());
38 meta.setSource(getType().getSourceName());
39 meta.setUrl(getSource().toString());
40 meta.setPublisher(getType().getSourceName());
41 meta.setUuid(getSource().toString());
42 meta.setLuid("");
43 meta.setLang("en");
44 meta.setSubject("manga");
45 meta.setType(getType().toString());
46 meta.setImageDocument(true);
47 meta.setCover(getCover());
48
49 return meta;
50 }
51
52 private String getTitle() {
53 Element doc = getSourceNode();
54
55 Element el = doc.getElementsByClass("detail-info-right-title-font").first();
56 if (el != null) {
57 return StringUtils.unhtml(el.text()).trim();
58 }
59
60 return null;
61 }
62
63 private String getAuthor() {
64 StringBuilder builder = new StringBuilder();
65 for (String author : getListA("detail-info-right-say")) {
66 if (builder.length() > 0)
67 builder.append(", ");
68 builder.append(author);
69 }
70
71 return builder.toString();
72 }
73
74 private List<String> getTags() {
75 return getListA("detail-info-right-tag-list");
76 }
77
78 private List<String> getListA(String uniqueClass) {
79 List<String> list = new ArrayList<String>();
80
81 Element doc = getSourceNode();
82 Element el = doc.getElementsByClass(uniqueClass).first();
83 if (el != null) {
84 for (Element valueA : el.getElementsByTag("a")) {
85 list.add(StringUtils.unhtml(valueA.text()).trim());
86 }
87 }
88
89 return list;
90 }
91
92 @Override
93 protected String getDesc() {
94 Element doc = getSourceNode();
95 Element title = doc.getElementsByClass("fullcontent").first();
96 if (title != null) {
97 return StringUtils.unhtml(title.text()).trim();
98 }
99
100 return null;
101 }
102
103 private Image getCover() {
104 Element doc = getSourceNode();
105 Element cover = doc.getElementsByClass("detail-info-cover-img").first();
106 if (cover != null) {
107 String coverUrl = cover.absUrl("src");
108
109 InputStream coverIn;
110 try {
111 coverIn = openEx(coverUrl);
112 try {
113 return new Image(coverIn);
114 } finally {
115 coverIn.close();
116 }
117 } catch (IOException e) {
118 Instance.getTraceHandler().error(e);
119 }
120 }
121
122 return null;
123 }
124
125 @Override
126 protected List<Entry<String, URL>> getChapters(Progress pg) {
127 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
128
129 String prefix = getTitle(); // each chapter starts with this prefix, then a
130 // chapter number (including "x.5"), then name
131
132 // normally, only one list...
133 Element doc = getSourceNode();
134 for (Element list : doc.getElementsByClass("detail-main-list")) {
135 for (Element el : list.getElementsByTag("a")) {
136 String title = el.attr("title");
137 if (title.startsWith(prefix)) {
138 title = title.substring(prefix.length()).trim();
139 }
140
141 String url = el.absUrl("href");
142
143 try {
144 urls.add(new AbstractMap.SimpleEntry<String, URL>(title, new URL(url)));
145 } catch (Exception e) {
146 Instance.getTraceHandler().error(e);
147 }
148 }
149 }
150
151 // by default, the chapters are in reversed order
152 Collections.reverse(urls);
153
154 return urls;
155 }
156
157 @Override
158 protected String getChapterContent(URL chapUrl, int number, Progress pg) throws IOException {
159 if (pg == null) {
160 pg = new Progress();
161 }
162
163 StringBuilder builder = new StringBuilder();
164
165 Document chapDoc = DataUtil.load(Instance.getCache().open(chapUrl, this, false), "UTF-8", chapUrl.toString());
166
167 // Example of what we want:
168 // URL: http://fanfox.net/manga/solo_leveling/c110.5/1.html#ipg1
169 // IMAGE, not working:
170 // http://s.fanfox.net/store/manga/29037/110.5/compressed/s034.jpg?token=f630767b0c96f6cc793fc8f1fc177c0ae9342eb1&amp;ttl=1585929600
171 // IMAGE, working:
172 // http://s.fanfox.net/store/manga/29037/000.0/compressed/m2018110o_143554_925.jpg?token=7d74569986335d49651ef1040f7dcb9dbd559b1b&ttl=1585929600
173 // NOTE: (c110.5 -> 110.5, c000 -> 000.0)
174 // NOTE: image key: m2018110o_143554_925 can be found in the script, but not
175 // sorted
176
177 // 0. Get the javascript content
178 StringBuilder javascript = new StringBuilder();
179 for (Element script : chapDoc.getElementsByTag("script")) {
180 javascript.append(script.html());
181 javascript.append("\n");
182 }
183
184 // 1. Get the chapter url part
185 String chap = chapUrl.getPath();
186 chap = chap.split("#")[0];
187 if (chap.endsWith("/1.html")) {
188 chap = chap.substring(0, chap.length() - "/1.html".length());
189 }
190 int pos = chap.lastIndexOf("/");
191 chap = chap.substring(pos + 1);
192 if (!chap.contains(".")) {
193 chap = chap + ".0";
194 }
195 if (chap.startsWith("c")) {
196 chap = chap.substring(1);
197 }
198
199 // 2. Token:
200 // <meta name="og:image"
201 // content="http://fmcdn.fanfox.net/store/manga/29037/cover.jpg?token=4b2056d83973716c715f2404940822dff942a7b4&ttl=1585998000&v=1584582495"
202 Element el = chapDoc.select("meta[name=\"og:image\"]").first();
203 String token = el.attr("content").split("\\?")[1];
204
205 // 3. Comic ID
206 int comicId = getIntVar(javascript, "comicid");
207
208 // 4. Get images
209 List<String> chapKeys = getImageKeys(javascript);
210 // http://s.fanfox.net/store/manga/29037/000.0/compressed/m2018110o_143554_925.jpg?token=7d74569986335d49651ef1040f7dcb9dbd559b1b&ttl=1585929600
211 String base = "http://s.fanfox.net/store/manga/%s/%s/compressed/%s.jpg?%s";
212 for (String key : chapKeys) {
213 String img = String.format(base, comicId, chap, key, token);
214 builder.append("[");
215 builder.append(img);
216 builder.append("]<br/>");
217 }
218
219 return builder.toString();
220 }
221
222 private int getIntVar(StringBuilder builder, String var) {
223 var = "var " + var;
224
225 int pos = builder.indexOf(var) + var.length();
226 String value = builder.subSequence(pos, pos + 20).toString();
227 value = value.split("=")[1].trim();
228 value = value.split(";")[0].trim();
229
230 return Integer.parseInt(value);
231 }
232
233 private List<String> getImageKeys(StringBuilder builder) {
234 List<String> chapKeys = new ArrayList<String>();
235
236 String start = "|compressed|";
237 String stop = ">";
238 int pos = builder.indexOf(start) + start.length();
239 int pos2 = builder.indexOf(stop, pos) - stop.length();
240
241 String data = builder.substring(pos, pos2);
242 data = data.replace("|", "'");
243 for (String key : data.split("'")) {
244 if (key.startsWith("m") && !key.equals("manga")) {
245 chapKeys.add(key);
246 }
247 }
248
249 Collections.sort(chapKeys);
250 return chapKeys;
251 }
252
253 /**
254 * Open the URL through the cache, but: retry a second time after 100ms if it
255 * fails, remove the query part of the {@link URL} before saving it to the cache
256 * (so it can be recalled later).
257 *
258 * @param url the {@link URL}
259 *
260 * @return the resource
261 *
262 * @throws IOException in case of I/O error
263 */
264 private InputStream openEx(String url) throws IOException {
265 try {
266 return Instance.getCache().open(new URL(url), withoutQuery(url), this, true);
267 } catch (Exception e) {
268 // second chance
269 try {
270 Thread.sleep(100);
271 } catch (InterruptedException ee) {
272 }
273
274 return Instance.getCache().open(new URL(url), withoutQuery(url), this, true);
275 }
276 }
277
278 /**
279 * Return the same input {@link URL} but without the query part.
280 *
281 * @param url the inpiut {@link URL} as a {@link String}
282 *
283 * @return the input {@link URL} without query
284 */
285 private URL withoutQuery(String url) {
286 URL o = null;
287 try {
288 // Remove the query from o (originalUrl), so it can be cached
289 // correctly
290 o = new URL(url);
291 o = new URL(o.getProtocol() + "://" + o.getHost() + o.getPath());
292
293 return o;
294 } catch (MalformedURLException e) {
295 return null;
296 }
297 }
298
299 @Override
300 protected boolean supports(URL url) {
301 return "mangafox.me".equals(url.getHost()) || "www.mangafox.me".equals(url.getHost())
302 || "fanfox.net".equals(url.getHost()) || "www.fanfox.net".equals(url.getHost());
303 }
304 }