Support for no-chapter stories or stories with descriiption before Chatper
[nikiroo-utils.git] / supported / E621.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
3import java.io.IOException;
b5e9855b 4import java.io.UnsupportedEncodingException;
c4b18c94 5import java.net.MalformedURLException;
08fe2e33 6import java.net.URL;
b5e9855b 7import java.net.URLDecoder;
ce297a79 8import java.util.AbstractMap;
08fe2e33 9import java.util.ArrayList;
9b863b20 10import java.util.Collections;
8ac3d099 11import java.util.Date;
b5e9855b 12import java.util.LinkedList;
08fe2e33
NR
13import java.util.List;
14import java.util.Map.Entry;
8ac3d099 15
5cf61f35
NR
16import org.json.JSONArray;
17import org.json.JSONException;
18import org.json.JSONObject;
8ac3d099
NR
19import org.jsoup.helper.DataUtil;
20import org.jsoup.nodes.Document;
21import org.jsoup.nodes.Element;
08fe2e33
NR
22
23import be.nikiroo.fanfix.Instance;
5cf61f35 24import be.nikiroo.fanfix.bundles.Config;
68686a37 25import be.nikiroo.fanfix.data.MetaData;
16a81ef7 26import be.nikiroo.utils.Image;
3b2b638f 27import be.nikiroo.utils.Progress;
08fe2e33 28import be.nikiroo.utils.StringUtils;
5cf61f35 29import be.nikiroo.utils.Version;
08fe2e33
NR
30
31/**
8ac3d099
NR
32 * Support class for <a href="http://e621.net/">e621.net</a> and
33 * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
08fe2e33
NR
34 * including some of MLP.
35 * <p>
36 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
37 * comics, but it can be difficult to browse.
38 *
39 * @author niki
40 */
8ac3d099 41class E621 extends BasicSupport {
08fe2e33
NR
42 @Override
43 protected boolean supports(URL url) {
44 String host = url.getHost();
45 if (host.startsWith("www.")) {
46 host = host.substring("www.".length());
47 }
48
5cf61f35
NR
49 return ("e621.net".equals(host) || "e926.net".equals(host))
50 && (isPool(url) || isSearchOrSet(url));
08fe2e33
NR
51 }
52
53 @Override
54 protected boolean isHtml() {
55 return true;
56 }
57
8ac3d099
NR
58 @Override
59 protected MetaData getMeta() throws IOException {
60 MetaData meta = new MetaData();
b5e9855b 61
8ac3d099
NR
62 meta.setTitle(getTitle());
63 meta.setAuthor(getAuthor());
bff19b54 64 meta.setDate(bsHelper.formatDate(getDate()));
8ac3d099
NR
65 meta.setTags(getTags());
66 meta.setSource(getType().getSourceName());
67 meta.setUrl(getSource().toString());
68 meta.setPublisher(getType().getSourceName());
69 meta.setUuid(getSource().toString());
70 meta.setLuid("");
71 meta.setLang("en");
72 meta.setSubject("Furry");
73 meta.setType(getType().toString());
74 meta.setImageDocument(true);
75 meta.setCover(getCover());
76 meta.setFakeCover(true);
595dfa7a 77
8ac3d099 78 return meta;
595dfa7a
NR
79 }
80
8ac3d099
NR
81 @Override
82 protected String getDesc() throws IOException {
83 if (isSearchOrSet(getSource())) {
b5e9855b 84 StringBuilder builder = new StringBuilder();
5cf61f35
NR
85 builder.append("A collection of images from ")
86 .append(getSource().getHost()).append("\n") //
87 .append("\tTime of creation: "
88 + StringUtils.fromTime(new Date().getTime()))
89 .append("\n") //
8ac3d099
NR
90 .append("\tTags: ");//
91 for (String tag : getTags()) {
92 builder.append("\t\t").append(tag);
b5e9855b
NR
93 }
94
95 return builder.toString();
96 }
97
8ac3d099
NR
98 if (isPool(getSource())) {
99 Element el = getSourceNode().getElementById("description");
100 if (el != null) {
101 return el.text();
08fe2e33
NR
102 }
103 }
104
105 return null;
106 }
107
08fe2e33 108 @Override
5cf61f35 109 protected List<Entry<String, URL>> getChapters(Progress pg)
8ac3d099 110 throws IOException {
5cf61f35
NR
111 int i = 1;
112 String jsonUrl = getJsonUrl();
113 if (jsonUrl != null) {
114 for (i = 1; true; i++) {
115 if (i > 1) {
116 try {
117 // The API does not accept more than 2 request per sec,
118 // and asks us to limit at one per sec when possible
119 Thread.sleep(1000);
120 } catch (InterruptedException e) {
121 }
122 }
b5e9855b 123
b5e9855b 124 try {
5cf61f35
NR
125 JSONObject json = getJson(jsonUrl + "&page=" + i, false);
126 if (!json.has("posts"))
b5e9855b 127 break;
5cf61f35
NR
128 JSONArray posts = json.getJSONArray("posts");
129 if (posts.isEmpty())
130 break;
131 } catch (Exception e) {
132 e.printStackTrace();
b5e9855b 133 }
b5e9855b 134 }
5cf61f35
NR
135
136 // The last page was empty:
137 i--;
138 }
139
140 // The pages and images are in reverse order on /posts/
141 List<Entry<String, URL>> chapters = new LinkedList<Entry<String, URL>>();
142 for (int page = i; page > 0; page--) {
143 chapters.add(new AbstractMap.SimpleEntry<String, URL>(
144 "Page " + Integer.toString(i - page + 1),
145 new URL(jsonUrl + "&page=" + page)));
b5e9855b
NR
146 }
147
5cf61f35 148 return chapters;
b5e9855b
NR
149 }
150
8ac3d099 151 @Override
5cf61f35
NR
152 protected String getChapterContent(URL chapUrl, int number, Progress pg)
153 throws IOException {
8ac3d099 154 StringBuilder builder = new StringBuilder();
5cf61f35
NR
155
156 JSONObject json = getJson(chapUrl, false);
157 JSONArray postsArr = json.getJSONArray("posts");
158
159 // The pages and images are in reverse order on /posts/
160 List<JSONObject> posts = new ArrayList<JSONObject>(postsArr.length());
161 for (int i = postsArr.length() - 1; i >= 0; i--) {
162 Object o = postsArr.get(i);
163 if (o instanceof JSONObject)
164 posts.add((JSONObject) o);
75002fcc 165 }
5cf61f35
NR
166
167 for (JSONObject post : posts) {
168 if (!post.has("file"))
169 continue;
170 JSONObject file = post.getJSONObject("file");
171 if (!file.has("url"))
172 continue;
173
174 try {
175 String url = file.getString("url");
176 builder.append("[");
177 builder.append(url);
178 builder.append("]<br/>");
179 } catch (JSONException e) {
180 // Can be NULL if filtered
181 // When the value is NULL, we get an exception
182 // but the "has" method still returns true
a3d0728c
NR
183 Instance.getInstance().getTraceHandler()
184 .error("Cannot get image for chapter " + number + " of "
185 + getSource());
5cf61f35 186 }
8ac3d099
NR
187 }
188
189 return builder.toString();
190 }
191
192 @Override
193 protected URL getCanonicalUrl(URL source) {
8fbfa934
NR
194 // Convert search-pools into proper pools
195 if (source.getPath().equals("/posts") && source.getQuery() != null
196 && source.getQuery().startsWith("tags=pool%3A")) {
197 String poolNumber = source.getQuery()
198 .substring("tags=pool%3A".length());
199 try {
200 Integer.parseInt(poolNumber);
201 String base = source.getProtocol() + "://" + source.getHost();
202 if (source.getPort() != -1) {
203 base = base + ":" + source.getPort();
204 }
42cdf6f0 205 source = new URL(base + "/pools/" + poolNumber);
8fbfa934 206 } catch (NumberFormatException e) {
36c35b92 207 // Not a simple pool, skip
8fbfa934
NR
208 } catch (MalformedURLException e) {
209 // Cannot happen
210 }
211 }
5cf61f35 212
8ac3d099
NR
213 if (isSetOriginalUrl(source)) {
214 try {
5cf61f35
NR
215 Document doc = DataUtil.load(Instance.getInstance().getCache()
216 .open(source, this, false), "UTF-8", source.toString());
217 for (Element shortname : doc
218 .getElementsByClass("set-shortname")) {
8ac3d099
NR
219 for (Element el : shortname.getElementsByTag("a")) {
220 if (!el.attr("href").isEmpty())
221 return new URL(el.absUrl("href"));
08fe2e33
NR
222 }
223 }
8ac3d099 224 } catch (IOException e) {
d66deb8d 225 Instance.getInstance().getTraceHandler().error(e);
08fe2e33
NR
226 }
227 }
228
c4b18c94
NR
229 if (isPool(source)) {
230 try {
5cf61f35
NR
231 return new URL(
232 source.toString().replace("/pool/show/", "/pools/"));
c4b18c94
NR
233 } catch (MalformedURLException e) {
234 }
235 }
236
8ac3d099
NR
237 return super.getCanonicalUrl(source);
238 }
239
8ac3d099
NR
240 private String getTitle() {
241 String title = "";
242
243 Element el = getSourceNode().getElementsByTag("title").first();
244 if (el != null) {
245 title = el.text().trim();
08fe2e33
NR
246 }
247
36c35b92 248 for (String s : new String[] { "e621", "-", "e621", "Pool", "-" }) {
8ac3d099
NR
249 if (title.startsWith(s)) {
250 title = title.substring(s.length()).trim();
08fe2e33 251 }
8ac3d099
NR
252 if (title.endsWith(s)) {
253 title = title.substring(0, title.length() - s.length()).trim();
254 }
08fe2e33
NR
255 }
256
8ac3d099
NR
257 if (isSearchOrSet(getSource())) {
258 title = title.isEmpty() ? "e621" : "[e621] " + title;
259 }
5cf61f35 260
8ac3d099 261 return title;
08fe2e33 262 }
b5e9855b 263
5cf61f35
NR
264 private String getAuthor() {
265 List<String> list = new ArrayList<String>();
266 String jsonUrl = getJsonUrl();
267 if (jsonUrl != null) {
268 try {
269 JSONObject json = getJson(jsonUrl, false);
270 JSONArray posts = json.getJSONArray("posts");
271 for (Object obj : posts) {
272 if (!(obj instanceof JSONObject))
273 continue;
274
275 JSONObject post = (JSONObject) obj;
276 if (!post.has("tags"))
277 continue;
278
279 JSONObject tags = post.getJSONObject("tags");
280 if (!tags.has("artist"))
281 continue;
282
283 JSONArray artists = tags.getJSONArray("artist");
284 for (Object artist : artists) {
285 if (list.contains(artist.toString()))
286 continue;
287
288 list.add(artist.toString());
8ac3d099 289 }
9948521d 290 }
5cf61f35
NR
291 } catch (Exception e) {
292 e.printStackTrace();
8ac3d099
NR
293 }
294 }
295
5cf61f35
NR
296 StringBuilder builder = new StringBuilder();
297 for (String artist : list) {
298 if (builder.length() > 0) {
299 builder.append(", ");
300 }
301 builder.append(artist);
302 }
8ac3d099 303
5cf61f35
NR
304 return builder.toString();
305 }
8ac3d099 306
5cf61f35
NR
307 private String getDate() {
308 String jsonUrl = getJsonUrl();
309 if (jsonUrl != null) {
310 try {
311 JSONObject json = getJson(jsonUrl, false);
312 JSONArray posts = json.getJSONArray("posts");
313 for (Object obj : posts) {
314 if (!(obj instanceof JSONObject))
315 continue;
8d1a4fd2 316
5cf61f35
NR
317 JSONObject post = (JSONObject) obj;
318 if (!post.has("created_at"))
319 continue;
320
321 return post.getString("created_at");
8d1a4fd2 322 }
5cf61f35
NR
323 } catch (Exception e) {
324 e.printStackTrace();
8d1a4fd2 325 }
8ac3d099 326 }
9948521d 327
5cf61f35 328 return "";
8ac3d099
NR
329 }
330
331 // no tags for pools
332 private List<String> getTags() {
333 List<String> tags = new ArrayList<String>();
334 if (isSearchOrSet(getSource())) {
335 String str = getTagsFromUrl(getSource());
336 for (String tag : str.split("\\+")) {
9b863b20 337 try {
8ac3d099
NR
338 tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
339 } catch (UnsupportedEncodingException e) {
9b863b20
NR
340 }
341 }
342 }
9948521d 343
8ac3d099
NR
344 return tags;
345 }
346
5cf61f35
NR
347 // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
348 private String getTagsFromUrl(URL url) {
349 String tags = url == null ? "" : url.getQuery();
350 int pos = tags.indexOf("tags=");
351
352 if (pos >= 0) {
353 tags = tags.substring(pos).substring("tags=".length());
354 } else {
355 return "";
356 }
357
358 pos = tags.indexOf('&');
359 if (pos > 0) {
360 tags = tags.substring(0, pos);
361 }
362 pos = tags.indexOf('/');
363 if (pos > 0) {
364 tags = tags.substring(0, pos);
365 }
366
367 return tags;
368 }
369
8ac3d099
NR
370 private Image getCover() throws IOException {
371 Image image = null;
372 List<Entry<String, URL>> chapters = getChapters(null);
373 if (!chapters.isEmpty()) {
12c180fc
NR
374 URL chap1Url = chapters.get(0).getValue();
375 String imgsChap1 = getChapterContent(chap1Url, 1, null);
376 if (!imgsChap1.isEmpty()) {
377 imgsChap1 = imgsChap1.split("]")[0].substring(1).trim();
378 image = bsImages.getImage(this, new URL(imgsChap1));
379 }
8ac3d099
NR
380 }
381
382 return image;
383 }
384
5cf61f35
NR
385 // always /posts.json/ url
386 private String getJsonUrl() {
387 String url = null;
388 if (isSearchOrSet(getSource())) {
389 url = getSource().toString().replace("/posts", "/posts.json");
390 }
391
392 if (isPool(getSource())) {
393 String poolNumber = getSource().getPath()
394 .substring("/pools/".length());
395 url = "https://e621.net/posts.json" + "?tags=pool%3A" + poolNumber;
396 }
397
398 if (url != null) {
399 // Note: one way to override the blacklist
400 String login = Instance.getInstance().getConfig()
401 .getString(Config.LOGIN_E621_LOGIN);
402 String apk = Instance.getInstance().getConfig()
403 .getString(Config.LOGIN_E621_APIKEY);
404
405 if (login != null && !login.isEmpty() && apk != null
406 && !apk.isEmpty()) {
407 url = String.format("%s&login=%s&api_key=%s&_client=%s", url,
408 login, apk, "fanfix-" + Version.getCurrentVersion());
409 }
410 }
411
412 return url;
413 }
414
8ac3d099
NR
415 // note: will be removed at getCanonicalUrl()
416 private boolean isSetOriginalUrl(URL originalUrl) {
417 return originalUrl.getPath().startsWith("/post_sets/");
9b863b20
NR
418 }
419
b5e9855b 420 private boolean isPool(URL url) {
5cf61f35
NR
421 return url.getPath().startsWith("/pools/")
422 || url.getPath().startsWith("/pool/show/");
b5e9855b
NR
423 }
424
8ac3d099
NR
425 // set will be renamed into search by canonical url
426 private boolean isSearchOrSet(URL url) {
427 return
428 // search:
429 (url.getPath().equals("/posts") && url.getQuery().contains("tags="))
430 // or set:
431 || isSetOriginalUrl(url);
b5e9855b 432 }
08fe2e33 433}