Merge branch 'subtree'
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / E621.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.UnsupportedEncodingException;
5 import java.net.MalformedURLException;
6 import java.net.URL;
7 import java.net.URLDecoder;
8 import java.util.AbstractMap;
9 import java.util.ArrayList;
10 import java.util.Date;
11 import java.util.LinkedList;
12 import java.util.List;
13 import java.util.Map.Entry;
14
15 import org.json.JSONArray;
16 import org.json.JSONException;
17 import org.json.JSONObject;
18 import org.jsoup.helper.DataUtil;
19 import org.jsoup.nodes.Document;
20 import org.jsoup.nodes.Element;
21
22 import be.nikiroo.fanfix.Instance;
23 import be.nikiroo.fanfix.bundles.Config;
24 import be.nikiroo.fanfix.data.MetaData;
25 import be.nikiroo.utils.Image;
26 import be.nikiroo.utils.Progress;
27 import be.nikiroo.utils.StringUtils;
28 import be.nikiroo.utils.Version;
29
30 /**
31 * Support class for <a href="http://e621.net/">e621.net</a> and
32 * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
33 * including some of MLP.
34 * <p>
35 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
36 * comics, but it can be difficult to browse.
37 *
38 * @author niki
39 */
40 class E621 extends BasicSupport {
41 @Override
42 protected boolean supports(URL url) {
43 String host = url.getHost();
44 if (host.startsWith("www.")) {
45 host = host.substring("www.".length());
46 }
47
48 return ("e621.net".equals(host) || "e926.net".equals(host))
49 && (isPool(url) || isSearchOrSet(url));
50 }
51
52 @Override
53 protected boolean isHtml() {
54 return true;
55 }
56
57 @Override
58 protected MetaData getMeta() throws IOException {
59 MetaData meta = new MetaData();
60
61 meta.setTitle(getTitle());
62 meta.setAuthor(getAuthor());
63 meta.setDate(bsHelper.formatDate(getDate()));
64 meta.setTags(getTags());
65 meta.setSource(getType().getSourceName());
66 meta.setUrl(getSource().toString());
67 meta.setPublisher(getType().getSourceName());
68 meta.setUuid(getSource().toString());
69 meta.setLuid("");
70 meta.setLang("en");
71 meta.setSubject("Furry");
72 meta.setType(getType().toString());
73 meta.setImageDocument(true);
74 meta.setCover(getCover());
75 meta.setFakeCover(true);
76
77 return meta;
78 }
79
80 @Override
81 protected String getDesc() throws IOException {
82 if (isSearchOrSet(getSource())) {
83 StringBuilder builder = new StringBuilder();
84 builder.append("A collection of images from ")
85 .append(getSource().getHost()).append("\n") //
86 .append("\tTime of creation: "
87 + StringUtils.fromTime(new Date().getTime()))
88 .append("\n") //
89 .append("\tTags: ");//
90 for (String tag : getTags()) {
91 builder.append("\t\t").append(tag);
92 }
93
94 return builder.toString();
95 }
96
97 if (isPool(getSource())) {
98 Element el = getSourceNode().getElementById("description");
99 if (el != null) {
100 return el.text();
101 }
102 }
103
104 return null;
105 }
106
107 @Override
108 protected List<Entry<String, URL>> getChapters(Progress pg)
109 throws IOException {
110 int i = 1;
111 String jsonUrl = getJsonUrl();
112 if (jsonUrl != null) {
113 for (i = 1; true; i++) {
114 if (i > 1) {
115 try {
116 // The API does not accept more than 2 request per sec,
117 // and asks us to limit at one per sec when possible
118 Thread.sleep(1000);
119 } catch (InterruptedException e) {
120 }
121 }
122
123 try {
124 JSONObject json = getJson(jsonUrl + "&page=" + i, false);
125 if (!json.has("posts"))
126 break;
127 JSONArray posts = json.getJSONArray("posts");
128 if (posts.isEmpty())
129 break;
130 } catch (Exception e) {
131 e.printStackTrace();
132 }
133 }
134
135 // The last page was empty:
136 i--;
137 }
138
139 // The pages and images are in reverse order on /posts/
140 List<Entry<String, URL>> chapters = new LinkedList<Entry<String, URL>>();
141 for (int page = i; page > 0; page--) {
142 chapters.add(new AbstractMap.SimpleEntry<String, URL>(
143 "Page " + Integer.toString(i - page + 1),
144 new URL(jsonUrl + "&page=" + page)));
145 }
146
147 return chapters;
148 }
149
150 @Override
151 protected String getChapterContent(URL chapUrl, int number, Progress pg)
152 throws IOException {
153 StringBuilder builder = new StringBuilder();
154
155 JSONObject json = getJson(chapUrl, false);
156 JSONArray postsArr = json.getJSONArray("posts");
157
158 // The pages and images are in reverse order on /posts/
159 List<JSONObject> posts = new ArrayList<JSONObject>(postsArr.length());
160 for (int i = postsArr.length() - 1; i >= 0; i--) {
161 Object o = postsArr.get(i);
162 if (o instanceof JSONObject)
163 posts.add((JSONObject) o);
164 }
165
166 for (JSONObject post : posts) {
167 if (!post.has("file"))
168 continue;
169 JSONObject file = post.getJSONObject("file");
170 if (!file.has("url"))
171 continue;
172
173 try {
174 String url = file.getString("url");
175 builder.append("[");
176 builder.append(url);
177 builder.append("]<br/>");
178 } catch (JSONException e) {
179 // Can be NULL if filtered
180 // When the value is NULL, we get an exception
181 // but the "has" method still returns true
182 Instance.getInstance().getTraceHandler()
183 .error("Cannot get image for chapter " + number + " of "
184 + getSource());
185 }
186 }
187
188 return builder.toString();
189 }
190
191 @Override
192 protected URL getCanonicalUrl(URL source) {
193 // Convert search-pools into proper pools
194 if (source.getPath().equals("/posts") && source.getQuery() != null
195 && source.getQuery().startsWith("tags=pool%3A")) {
196 String poolNumber = source.getQuery()
197 .substring("tags=pool%3A".length());
198 try {
199 Integer.parseInt(poolNumber);
200 String base = source.getProtocol() + "://" + source.getHost();
201 if (source.getPort() != -1) {
202 base = base + ":" + source.getPort();
203 }
204 source = new URL(base + "/pools/" + poolNumber);
205 } catch (NumberFormatException e) {
206 // Not a simple pool, skip
207 } catch (MalformedURLException e) {
208 // Cannot happen
209 }
210 }
211
212 if (isSetOriginalUrl(source)) {
213 try {
214 Document doc = DataUtil.load(Instance.getInstance().getCache()
215 .open(source, this, false), "UTF-8", source.toString());
216 for (Element shortname : doc
217 .getElementsByClass("set-shortname")) {
218 for (Element el : shortname.getElementsByTag("a")) {
219 if (!el.attr("href").isEmpty())
220 return new URL(el.absUrl("href"));
221 }
222 }
223 } catch (IOException e) {
224 Instance.getInstance().getTraceHandler().error(e);
225 }
226 }
227
228 if (isPool(source)) {
229 try {
230 return new URL(
231 source.toString().replace("/pool/show/", "/pools/"));
232 } catch (MalformedURLException e) {
233 }
234 }
235
236 return super.getCanonicalUrl(source);
237 }
238
239 private String getTitle() {
240 String title = "";
241
242 Element el = getSourceNode().getElementsByTag("title").first();
243 if (el != null) {
244 title = el.text().trim();
245 }
246
247 for (String s : new String[] { "e621", "-", "e621", "Pool", "-" }) {
248 if (title.startsWith(s)) {
249 title = title.substring(s.length()).trim();
250 }
251 if (title.endsWith(s)) {
252 title = title.substring(0, title.length() - s.length()).trim();
253 }
254 }
255
256 if (isSearchOrSet(getSource())) {
257 title = title.isEmpty() ? "e621" : "[e621] " + title;
258 }
259
260 return title;
261 }
262
263 private String getAuthor() {
264 List<String> list = new ArrayList<String>();
265 String jsonUrl = getJsonUrl();
266 if (jsonUrl != null) {
267 try {
268 JSONObject json = getJson(jsonUrl, false);
269 JSONArray posts = json.getJSONArray("posts");
270 for (Object obj : posts) {
271 if (!(obj instanceof JSONObject))
272 continue;
273
274 JSONObject post = (JSONObject) obj;
275 if (!post.has("tags"))
276 continue;
277
278 JSONObject tags = post.getJSONObject("tags");
279 if (!tags.has("artist"))
280 continue;
281
282 JSONArray artists = tags.getJSONArray("artist");
283 for (Object artist : artists) {
284 if (list.contains(artist.toString()))
285 continue;
286
287 list.add(artist.toString());
288 }
289 }
290 } catch (Exception e) {
291 e.printStackTrace();
292 }
293 }
294
295 StringBuilder builder = new StringBuilder();
296 for (String artist : list) {
297 if (builder.length() > 0) {
298 builder.append(", ");
299 }
300 builder.append(artist);
301 }
302
303 return builder.toString();
304 }
305
306 private String getDate() {
307 String jsonUrl = getJsonUrl();
308 if (jsonUrl != null) {
309 try {
310 JSONObject json = getJson(jsonUrl, false);
311 JSONArray posts = json.getJSONArray("posts");
312 for (Object obj : posts) {
313 if (!(obj instanceof JSONObject))
314 continue;
315
316 JSONObject post = (JSONObject) obj;
317 if (!post.has("created_at"))
318 continue;
319
320 return post.getString("created_at");
321 }
322 } catch (Exception e) {
323 e.printStackTrace();
324 }
325 }
326
327 return "";
328 }
329
330 // no tags for pools
331 private List<String> getTags() {
332 List<String> tags = new ArrayList<String>();
333 if (isSearchOrSet(getSource())) {
334 String str = getTagsFromUrl(getSource());
335 for (String tag : str.split("\\+")) {
336 try {
337 tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
338 } catch (UnsupportedEncodingException e) {
339 }
340 }
341 }
342
343 return tags;
344 }
345
346 // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
347 private String getTagsFromUrl(URL url) {
348 String tags = url == null ? "" : url.getQuery();
349 int pos = tags.indexOf("tags=");
350
351 if (pos >= 0) {
352 tags = tags.substring(pos).substring("tags=".length());
353 } else {
354 return "";
355 }
356
357 pos = tags.indexOf('&');
358 if (pos > 0) {
359 tags = tags.substring(0, pos);
360 }
361 pos = tags.indexOf('/');
362 if (pos > 0) {
363 tags = tags.substring(0, pos);
364 }
365
366 return tags;
367 }
368
369 private Image getCover() throws IOException {
370 Image image = null;
371 List<Entry<String, URL>> chapters = getChapters(null);
372 if (!chapters.isEmpty()) {
373 URL chap1Url = chapters.get(0).getValue();
374 String imgsChap1 = getChapterContent(chap1Url, 1, null);
375 if (!imgsChap1.isEmpty()) {
376 imgsChap1 = imgsChap1.split("]")[0].substring(1).trim();
377 image = bsImages.getImage(this, new URL(imgsChap1));
378 }
379 }
380
381 return image;
382 }
383
384 // always /posts.json/ url
385 private String getJsonUrl() {
386 String url = null;
387 if (isSearchOrSet(getSource())) {
388 url = getSource().toString().replace("/posts", "/posts.json");
389 }
390
391 if (isPool(getSource())) {
392 String poolNumber = getSource().getPath()
393 .substring("/pools/".length());
394 url = "https://e621.net/posts.json" + "?tags=pool%3A" + poolNumber;
395 }
396
397 if (url != null) {
398 // Note: one way to override the blacklist
399 String login = Instance.getInstance().getConfig()
400 .getString(Config.LOGIN_E621_LOGIN);
401 String apk = Instance.getInstance().getConfig()
402 .getString(Config.LOGIN_E621_APIKEY);
403
404 if (login != null && !login.isEmpty() && apk != null
405 && !apk.isEmpty()) {
406 url = String.format("%s&login=%s&api_key=%s&_client=%s", url,
407 login, apk, "fanfix-" + Version.getCurrentVersion());
408 }
409 }
410
411 return url;
412 }
413
414 // note: will be removed at getCanonicalUrl()
415 private boolean isSetOriginalUrl(URL originalUrl) {
416 return originalUrl.getPath().startsWith("/post_sets/");
417 }
418
419 private boolean isPool(URL url) {
420 return url.getPath().startsWith("/pools/")
421 || url.getPath().startsWith("/pool/show/");
422 }
423
424 // set will be renamed into search by canonical url
425 private boolean isSearchOrSet(URL url) {
426 return
427 // search:
428 (url.getPath().equals("/posts") && url.getQuery().contains("tags="))
429 // or set:
430 || isSetOriginalUrl(url);
431 }
432 }