Support for no-chapter stories or stories with descriiption before Chatper
[nikiroo-utils.git] / supported / E621.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.UnsupportedEncodingException;
5 import java.net.MalformedURLException;
6 import java.net.URL;
7 import java.net.URLDecoder;
8 import java.util.AbstractMap;
9 import java.util.ArrayList;
10 import java.util.Collections;
11 import java.util.Date;
12 import java.util.LinkedList;
13 import java.util.List;
14 import java.util.Map.Entry;
15
16 import org.json.JSONArray;
17 import org.json.JSONException;
18 import org.json.JSONObject;
19 import org.jsoup.helper.DataUtil;
20 import org.jsoup.nodes.Document;
21 import org.jsoup.nodes.Element;
22
23 import be.nikiroo.fanfix.Instance;
24 import be.nikiroo.fanfix.bundles.Config;
25 import be.nikiroo.fanfix.data.MetaData;
26 import be.nikiroo.utils.Image;
27 import be.nikiroo.utils.Progress;
28 import be.nikiroo.utils.StringUtils;
29 import be.nikiroo.utils.Version;
30
31 /**
32 * Support class for <a href="http://e621.net/">e621.net</a> and
33 * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
34 * including some of MLP.
35 * <p>
36 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
37 * comics, but it can be difficult to browse.
38 *
39 * @author niki
40 */
41 class E621 extends BasicSupport {
42 @Override
43 protected boolean supports(URL url) {
44 String host = url.getHost();
45 if (host.startsWith("www.")) {
46 host = host.substring("www.".length());
47 }
48
49 return ("e621.net".equals(host) || "e926.net".equals(host))
50 && (isPool(url) || isSearchOrSet(url));
51 }
52
53 @Override
54 protected boolean isHtml() {
55 return true;
56 }
57
58 @Override
59 protected MetaData getMeta() throws IOException {
60 MetaData meta = new MetaData();
61
62 meta.setTitle(getTitle());
63 meta.setAuthor(getAuthor());
64 meta.setDate(bsHelper.formatDate(getDate()));
65 meta.setTags(getTags());
66 meta.setSource(getType().getSourceName());
67 meta.setUrl(getSource().toString());
68 meta.setPublisher(getType().getSourceName());
69 meta.setUuid(getSource().toString());
70 meta.setLuid("");
71 meta.setLang("en");
72 meta.setSubject("Furry");
73 meta.setType(getType().toString());
74 meta.setImageDocument(true);
75 meta.setCover(getCover());
76 meta.setFakeCover(true);
77
78 return meta;
79 }
80
81 @Override
82 protected String getDesc() throws IOException {
83 if (isSearchOrSet(getSource())) {
84 StringBuilder builder = new StringBuilder();
85 builder.append("A collection of images from ")
86 .append(getSource().getHost()).append("\n") //
87 .append("\tTime of creation: "
88 + StringUtils.fromTime(new Date().getTime()))
89 .append("\n") //
90 .append("\tTags: ");//
91 for (String tag : getTags()) {
92 builder.append("\t\t").append(tag);
93 }
94
95 return builder.toString();
96 }
97
98 if (isPool(getSource())) {
99 Element el = getSourceNode().getElementById("description");
100 if (el != null) {
101 return el.text();
102 }
103 }
104
105 return null;
106 }
107
108 @Override
109 protected List<Entry<String, URL>> getChapters(Progress pg)
110 throws IOException {
111 int i = 1;
112 String jsonUrl = getJsonUrl();
113 if (jsonUrl != null) {
114 for (i = 1; true; i++) {
115 if (i > 1) {
116 try {
117 // The API does not accept more than 2 request per sec,
118 // and asks us to limit at one per sec when possible
119 Thread.sleep(1000);
120 } catch (InterruptedException e) {
121 }
122 }
123
124 try {
125 JSONObject json = getJson(jsonUrl + "&page=" + i, false);
126 if (!json.has("posts"))
127 break;
128 JSONArray posts = json.getJSONArray("posts");
129 if (posts.isEmpty())
130 break;
131 } catch (Exception e) {
132 e.printStackTrace();
133 }
134 }
135
136 // The last page was empty:
137 i--;
138 }
139
140 // The pages and images are in reverse order on /posts/
141 List<Entry<String, URL>> chapters = new LinkedList<Entry<String, URL>>();
142 for (int page = i; page > 0; page--) {
143 chapters.add(new AbstractMap.SimpleEntry<String, URL>(
144 "Page " + Integer.toString(i - page + 1),
145 new URL(jsonUrl + "&page=" + page)));
146 }
147
148 return chapters;
149 }
150
151 @Override
152 protected String getChapterContent(URL chapUrl, int number, Progress pg)
153 throws IOException {
154 StringBuilder builder = new StringBuilder();
155
156 JSONObject json = getJson(chapUrl, false);
157 JSONArray postsArr = json.getJSONArray("posts");
158
159 // The pages and images are in reverse order on /posts/
160 List<JSONObject> posts = new ArrayList<JSONObject>(postsArr.length());
161 for (int i = postsArr.length() - 1; i >= 0; i--) {
162 Object o = postsArr.get(i);
163 if (o instanceof JSONObject)
164 posts.add((JSONObject) o);
165 }
166
167 for (JSONObject post : posts) {
168 if (!post.has("file"))
169 continue;
170 JSONObject file = post.getJSONObject("file");
171 if (!file.has("url"))
172 continue;
173
174 try {
175 String url = file.getString("url");
176 builder.append("[");
177 builder.append(url);
178 builder.append("]<br/>");
179 } catch (JSONException e) {
180 // Can be NULL if filtered
181 // When the value is NULL, we get an exception
182 // but the "has" method still returns true
183 Instance.getInstance().getTraceHandler()
184 .error("Cannot get image for chapter " + number + " of "
185 + getSource());
186 }
187 }
188
189 return builder.toString();
190 }
191
192 @Override
193 protected URL getCanonicalUrl(URL source) {
194 // Convert search-pools into proper pools
195 if (source.getPath().equals("/posts") && source.getQuery() != null
196 && source.getQuery().startsWith("tags=pool%3A")) {
197 String poolNumber = source.getQuery()
198 .substring("tags=pool%3A".length());
199 try {
200 Integer.parseInt(poolNumber);
201 String base = source.getProtocol() + "://" + source.getHost();
202 if (source.getPort() != -1) {
203 base = base + ":" + source.getPort();
204 }
205 source = new URL(base + "/pools/" + poolNumber);
206 } catch (NumberFormatException e) {
207 // Not a simple pool, skip
208 } catch (MalformedURLException e) {
209 // Cannot happen
210 }
211 }
212
213 if (isSetOriginalUrl(source)) {
214 try {
215 Document doc = DataUtil.load(Instance.getInstance().getCache()
216 .open(source, this, false), "UTF-8", source.toString());
217 for (Element shortname : doc
218 .getElementsByClass("set-shortname")) {
219 for (Element el : shortname.getElementsByTag("a")) {
220 if (!el.attr("href").isEmpty())
221 return new URL(el.absUrl("href"));
222 }
223 }
224 } catch (IOException e) {
225 Instance.getInstance().getTraceHandler().error(e);
226 }
227 }
228
229 if (isPool(source)) {
230 try {
231 return new URL(
232 source.toString().replace("/pool/show/", "/pools/"));
233 } catch (MalformedURLException e) {
234 }
235 }
236
237 return super.getCanonicalUrl(source);
238 }
239
240 private String getTitle() {
241 String title = "";
242
243 Element el = getSourceNode().getElementsByTag("title").first();
244 if (el != null) {
245 title = el.text().trim();
246 }
247
248 for (String s : new String[] { "e621", "-", "e621", "Pool", "-" }) {
249 if (title.startsWith(s)) {
250 title = title.substring(s.length()).trim();
251 }
252 if (title.endsWith(s)) {
253 title = title.substring(0, title.length() - s.length()).trim();
254 }
255 }
256
257 if (isSearchOrSet(getSource())) {
258 title = title.isEmpty() ? "e621" : "[e621] " + title;
259 }
260
261 return title;
262 }
263
264 private String getAuthor() {
265 List<String> list = new ArrayList<String>();
266 String jsonUrl = getJsonUrl();
267 if (jsonUrl != null) {
268 try {
269 JSONObject json = getJson(jsonUrl, false);
270 JSONArray posts = json.getJSONArray("posts");
271 for (Object obj : posts) {
272 if (!(obj instanceof JSONObject))
273 continue;
274
275 JSONObject post = (JSONObject) obj;
276 if (!post.has("tags"))
277 continue;
278
279 JSONObject tags = post.getJSONObject("tags");
280 if (!tags.has("artist"))
281 continue;
282
283 JSONArray artists = tags.getJSONArray("artist");
284 for (Object artist : artists) {
285 if (list.contains(artist.toString()))
286 continue;
287
288 list.add(artist.toString());
289 }
290 }
291 } catch (Exception e) {
292 e.printStackTrace();
293 }
294 }
295
296 StringBuilder builder = new StringBuilder();
297 for (String artist : list) {
298 if (builder.length() > 0) {
299 builder.append(", ");
300 }
301 builder.append(artist);
302 }
303
304 return builder.toString();
305 }
306
307 private String getDate() {
308 String jsonUrl = getJsonUrl();
309 if (jsonUrl != null) {
310 try {
311 JSONObject json = getJson(jsonUrl, false);
312 JSONArray posts = json.getJSONArray("posts");
313 for (Object obj : posts) {
314 if (!(obj instanceof JSONObject))
315 continue;
316
317 JSONObject post = (JSONObject) obj;
318 if (!post.has("created_at"))
319 continue;
320
321 return post.getString("created_at");
322 }
323 } catch (Exception e) {
324 e.printStackTrace();
325 }
326 }
327
328 return "";
329 }
330
331 // no tags for pools
332 private List<String> getTags() {
333 List<String> tags = new ArrayList<String>();
334 if (isSearchOrSet(getSource())) {
335 String str = getTagsFromUrl(getSource());
336 for (String tag : str.split("\\+")) {
337 try {
338 tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
339 } catch (UnsupportedEncodingException e) {
340 }
341 }
342 }
343
344 return tags;
345 }
346
347 // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
348 private String getTagsFromUrl(URL url) {
349 String tags = url == null ? "" : url.getQuery();
350 int pos = tags.indexOf("tags=");
351
352 if (pos >= 0) {
353 tags = tags.substring(pos).substring("tags=".length());
354 } else {
355 return "";
356 }
357
358 pos = tags.indexOf('&');
359 if (pos > 0) {
360 tags = tags.substring(0, pos);
361 }
362 pos = tags.indexOf('/');
363 if (pos > 0) {
364 tags = tags.substring(0, pos);
365 }
366
367 return tags;
368 }
369
370 private Image getCover() throws IOException {
371 Image image = null;
372 List<Entry<String, URL>> chapters = getChapters(null);
373 if (!chapters.isEmpty()) {
374 URL chap1Url = chapters.get(0).getValue();
375 String imgsChap1 = getChapterContent(chap1Url, 1, null);
376 if (!imgsChap1.isEmpty()) {
377 imgsChap1 = imgsChap1.split("]")[0].substring(1).trim();
378 image = bsImages.getImage(this, new URL(imgsChap1));
379 }
380 }
381
382 return image;
383 }
384
385 // always /posts.json/ url
386 private String getJsonUrl() {
387 String url = null;
388 if (isSearchOrSet(getSource())) {
389 url = getSource().toString().replace("/posts", "/posts.json");
390 }
391
392 if (isPool(getSource())) {
393 String poolNumber = getSource().getPath()
394 .substring("/pools/".length());
395 url = "https://e621.net/posts.json" + "?tags=pool%3A" + poolNumber;
396 }
397
398 if (url != null) {
399 // Note: one way to override the blacklist
400 String login = Instance.getInstance().getConfig()
401 .getString(Config.LOGIN_E621_LOGIN);
402 String apk = Instance.getInstance().getConfig()
403 .getString(Config.LOGIN_E621_APIKEY);
404
405 if (login != null && !login.isEmpty() && apk != null
406 && !apk.isEmpty()) {
407 url = String.format("%s&login=%s&api_key=%s&_client=%s", url,
408 login, apk, "fanfix-" + Version.getCurrentVersion());
409 }
410 }
411
412 return url;
413 }
414
415 // note: will be removed at getCanonicalUrl()
416 private boolean isSetOriginalUrl(URL originalUrl) {
417 return originalUrl.getPath().startsWith("/post_sets/");
418 }
419
420 private boolean isPool(URL url) {
421 return url.getPath().startsWith("/pools/")
422 || url.getPath().startsWith("/pool/show/");
423 }
424
425 // set will be renamed into search by canonical url
426 private boolean isSearchOrSet(URL url) {
427 return
428 // search:
429 (url.getPath().equals("/posts") && url.getQuery().contains("tags="))
430 // or set:
431 || isSetOriginalUrl(url);
432 }
433 }