Fix remote timeot (try1) + reverse e621 /post/
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / E621.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
3import java.io.IOException;
4import java.io.InputStream;
b5e9855b 5import java.io.UnsupportedEncodingException;
9b863b20 6import java.net.MalformedURLException;
08fe2e33 7import java.net.URL;
b5e9855b
NR
8import java.net.URLDecoder;
9import java.net.URLEncoder;
ce297a79 10import java.util.AbstractMap;
08fe2e33 11import java.util.ArrayList;
9b863b20
NR
12import java.util.Collection;
13import java.util.Collections;
b5e9855b 14import java.util.LinkedList;
08fe2e33 15import java.util.List;
b5e9855b 16import java.util.AbstractMap.SimpleEntry;
08fe2e33
NR
17import java.util.Map.Entry;
18import java.util.Scanner;
19
20import be.nikiroo.fanfix.Instance;
21import be.nikiroo.fanfix.data.Chapter;
68686a37 22import be.nikiroo.fanfix.data.MetaData;
08fe2e33 23import be.nikiroo.fanfix.data.Story;
16a81ef7 24import be.nikiroo.utils.Image;
3b2b638f 25import be.nikiroo.utils.Progress;
08fe2e33
NR
26import be.nikiroo.utils.StringUtils;
27
28/**
29 * Support class for <a href="http://e621.net/">e621.net</a> and <a
30 * href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
31 * including some of MLP.
32 * <p>
33 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
34 * comics, but it can be difficult to browse.
35 *
36 * @author niki
37 */
0ffa4754 38class E621 extends BasicSupport_Deprecated {
08fe2e33
NR
39 @Override
40 public String getSourceName() {
41 return "e621.net";
42 }
43
44 @Override
68686a37
NR
45 protected MetaData getMeta(URL source, InputStream in) throws IOException {
46 MetaData meta = new MetaData();
47
48 meta.setTitle(getTitle(reset(in)));
49 meta.setAuthor(getAuthor(source, reset(in)));
50 meta.setDate("");
b5e9855b 51 meta.setTags(getTags(source, reset(in), false));
68686a37 52 meta.setSource(getSourceName());
2206ef66 53 meta.setUrl(source.toString());
68686a37
NR
54 meta.setPublisher(getSourceName());
55 meta.setUuid(source.toString());
56 meta.setLuid("");
276f95c6 57 meta.setLang("en");
a4143cd7 58 meta.setSubject("Furry");
68686a37
NR
59 meta.setType(getType().toString());
60 meta.setImageDocument(true);
b5e9855b 61 meta.setCover(getCover(source, reset(in)));
a9eb3f46 62 meta.setFakeCover(true);
68686a37
NR
63
64 return meta;
08fe2e33
NR
65 }
66
b5e9855b
NR
67 private List<String> getTags(URL source, InputStream in, boolean authors) {
68 List<String> tags = new ArrayList<String>();
69
70 if (isSearch(source)) {
71 String tagLine = getLine(in, "id=\"tag-sidebar\"", 1);
72 if (tagLine != null) {
73 String key = "href=\"";
74 for (int pos = tagLine.indexOf(key); pos >= 0; pos = tagLine
75 .indexOf(key, pos + 1)) {
76 int end = tagLine.indexOf("\"", pos + key.length());
77 if (end >= 0) {
78 String href = tagLine.substring(pos, end);
79 String subkey;
80 if (authors)
81 subkey = "?name=";
82 else
83 subkey = "?title=";
84 if (href.contains(subkey)) {
85 String tag = href.substring(href.indexOf(subkey)
86 + subkey.length());
87 try {
88 tags.add(URLDecoder.decode(tag, "UTF-8"));
89 } catch (UnsupportedEncodingException e) {
90 // supported JVMs must have UTF-8 support
91 e.printStackTrace();
92 }
93 }
94 }
95 }
96
97 }
98 }
99
100 return tags;
101 }
102
08fe2e33 103 @Override
92fb0719 104 public Story process(URL url, Progress pg) throws IOException {
08fe2e33 105 // There is no chapters on e621, just pagination...
92fb0719 106 Story story = super.process(url, pg);
08fe2e33
NR
107
108 Chapter only = new Chapter(1, null);
109 for (Chapter chap : story) {
110 only.getParagraphs().addAll(chap.getParagraphs());
111 }
112
113 story.getChapters().clear();
114 story.getChapters().add(only);
115
116 return story;
117 }
118
119 @Override
120 protected boolean supports(URL url) {
121 String host = url.getHost();
122 if (host.startsWith("www.")) {
123 host = host.substring("www.".length());
124 }
125
126 return ("e621.net".equals(host) || "e926.net".equals(host))
b5e9855b 127 && (isPool(url) || isSearch(url));
08fe2e33
NR
128 }
129
130 @Override
131 protected boolean isHtml() {
132 return true;
133 }
134
b5e9855b 135 private Image getCover(URL source, InputStream in) throws IOException {
678390e0
NR
136 URL urlForCover = source;
137 if (isPool(source)) {
138 urlForCover = new URL(source.toString() + "?page=1");
139 }
b5e9855b 140
678390e0 141 String images = getChapterContent(urlForCover, in, 1, null);
595dfa7a 142 if (!images.isEmpty()) {
406447a4 143 int pos = images.indexOf("<br/>");
595dfa7a
NR
144 if (pos >= 0) {
145 images = images.substring(1, pos - 1);
146 return getImage(this, null, images);
147 }
148 }
149
150 return null;
151 }
152
211f7ddb 153 private String getAuthor(URL source, InputStream in) {
b5e9855b
NR
154 if (isSearch(source)) {
155 StringBuilder builder = new StringBuilder();
156 for (String author : getTags(source, in, true)) {
157 if (builder.length() > 0)
158 builder.append(", ");
159 builder.append(author);
160 }
161
162 return builder.toString();
163 }
164
08fe2e33
NR
165 String author = getLine(in, "href=\"/post/show/", 0);
166 if (author != null) {
167 String key = "href=\"";
168 int pos = author.indexOf(key);
169 if (pos >= 0) {
170 author = author.substring(pos + key.length());
171 pos = author.indexOf("\"");
172 if (pos >= 0) {
173 author = author.substring(0, pos - 1);
174 String page = source.getProtocol() + "://"
175 + source.getHost() + author;
08fe2e33 176 try {
7d0d2be6
NR
177 InputStream pageIn = Instance.getCache().open(
178 new URL(page), this, false);
179 try {
180 key = "class=\"tag-type-artist\"";
181 author = getLine(pageIn, key, 0);
182 if (author != null) {
183 pos = author.indexOf("<a href=\"");
08fe2e33 184 if (pos >= 0) {
7d0d2be6
NR
185 author = author.substring(pos);
186 pos = author.indexOf("</a>");
187 if (pos >= 0) {
188 author = author.substring(0, pos);
189 return StringUtils.unhtml(author);
190 }
08fe2e33
NR
191 }
192 }
7d0d2be6
NR
193 } finally {
194 pageIn.close();
08fe2e33 195 }
7d0d2be6
NR
196 } catch (Exception e) {
197 // No author found
08fe2e33
NR
198 }
199 }
200 }
201 }
202
203 return null;
204 }
205
211f7ddb 206 private String getTitle(InputStream in) {
08fe2e33
NR
207 String title = getLine(in, "<title>", 0);
208 if (title != null) {
209 int pos = title.indexOf('>');
210 if (pos >= 0) {
211 title = title.substring(pos + 1);
212 pos = title.indexOf('<');
213 if (pos >= 0) {
214 title = title.substring(0, pos);
215 }
216 }
217
218 if (title.startsWith("Pool:")) {
219 title = title.substring("Pool:".length());
220 }
221
68686a37 222 title = StringUtils.unhtml(title).trim();
08fe2e33
NR
223 }
224
225 return title;
226 }
227
228 @Override
229 protected String getDesc(URL source, InputStream in) throws IOException {
230 String desc = getLine(in, "margin-bottom: 2em;", 0);
231
232 if (desc != null) {
233 StringBuilder builder = new StringBuilder();
234
235 boolean inTags = false;
236 for (char car : desc.toCharArray()) {
237 if ((inTags && car == '>') || (!inTags && car == '<')) {
238 inTags = !inTags;
239 }
240
241 if (inTags) {
242 builder.append(car);
243 }
244 }
245
246 return builder.toString().trim();
247 }
248
249 return null;
250 }
251
252 @Override
ed08c171
NR
253 protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
254 Progress pg) throws IOException {
b5e9855b
NR
255 if (isPool(source)) {
256 return getChaptersPool(source, in, pg);
257 } else if (isSearch(source)) {
258 return getChaptersSearch(source, in, pg);
259 }
260
261 return new LinkedList<Entry<String, URL>>();
262 }
263
264 private List<Entry<String, URL>> getChaptersSearch(URL source,
265 InputStream in, Progress pg) throws IOException {
266 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
267
268 String search = source.getPath();
269 if (search.endsWith("/")) {
270 search = search.substring(0, search.length() - 1);
271 }
272
273 int pos = search.lastIndexOf('/');
274 if (pos >= 0) {
275 search = search.substring(pos + 1);
276 }
277
278 String baseUrl = "https://e621.net/post/index/";
279 if (source.getHost().contains("e926")) {
280 baseUrl = baseUrl.replace("e621", "e926");
281 }
282
283 for (int i = 1; true; i++) {
284 URL url = new URL(baseUrl + i + "/" + search + "/");
285 try {
286 InputStream pageI = Instance.getCache().open(url, this, false);
287 try {
288 if (getLine(pageI, "No posts matched your search.", 0) != null)
289 break;
9b863b20
NR
290 urls.add(new AbstractMap.SimpleEntry<String, URL>("Page "
291 + Integer.toString(i), url));
b5e9855b
NR
292 } finally {
293 pageI.close();
294 }
295 } catch (Exception e) {
296 break;
297 }
298 }
299
9b863b20
NR
300 // They are sorted in reverse order on the website
301 Collections.reverse(urls);
b5e9855b
NR
302 return urls;
303 }
304
305 private List<Entry<String, URL>> getChaptersPool(URL source,
306 InputStream in, Progress pg) throws IOException {
08fe2e33
NR
307 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
308 int last = 1; // no pool/show when only one page
309
310 @SuppressWarnings("resource")
311 Scanner scan = new Scanner(in, "UTF-8");
312 scan.useDelimiter("\\n");
313 while (scan.hasNext()) {
314 String line = scan.next();
315 for (int pos = line.indexOf(source.getPath()); pos >= 0; pos = line
316 .indexOf(source.getPath(), pos + source.getPath().length())) {
317 int equalPos = line.indexOf("=", pos);
318 int quotePos = line.indexOf("\"", pos);
319 if (equalPos >= 0 && quotePos > equalPos) {
320 String snum = line.substring(equalPos + 1, quotePos);
321 try {
322 int num = Integer.parseInt(snum);
323 if (num > last) {
324 last = num;
325 }
326 } catch (NumberFormatException e) {
327 }
328 }
329 }
330 }
331
332 for (int i = 1; i <= last; i++) {
ce297a79
NR
333 urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
334 .toString(i), new URL(source.toString() + "?page=" + i)));
08fe2e33
NR
335 }
336
337 return urls;
338 }
339
340 @Override
ed08c171
NR
341 protected String getChapterContent(URL source, InputStream in, int number,
342 Progress pg) throws IOException {
08fe2e33
NR
343 StringBuilder builder = new StringBuilder();
344 String staticSite = "https://static1.e621.net";
345 if (source.getHost().contains("e926")) {
346 staticSite = staticSite.replace("e621", "e926");
347 }
348
349 String key = staticSite + "/data/preview/";
350
351 @SuppressWarnings("resource")
352 Scanner scan = new Scanner(in, "UTF-8");
353 scan.useDelimiter("\\n");
354 while (scan.hasNext()) {
355 String line = scan.next();
d98a2900 356 if (line.contains("class=\"preview")) {
08fe2e33
NR
357 for (int pos = line.indexOf(key); pos >= 0; pos = line.indexOf(
358 key, pos + key.length())) {
359 int endPos = line.indexOf("\"", pos);
360 if (endPos >= 0) {
361 String id = line.substring(pos + key.length(), endPos);
362 id = staticSite + "/data/" + id;
363
364 int dotPos = id.lastIndexOf(".");
365 if (dotPos >= 0) {
366 id = id.substring(0, dotPos);
367 builder.append("[");
368 builder.append(id);
406447a4 369 builder.append("]<br/>");
08fe2e33
NR
370 }
371 }
372 }
373 }
374 }
375
376 return builder.toString();
377 }
b5e9855b 378
9b863b20
NR
379 @Override
380 protected URL getCanonicalUrl(URL source) {
381 if (isSearch(source)) {
382 // /post?tags=tag1+tag2 -> ../post/index/1/tag1%32tag2
383 String key = "post?tags=";
384 if (source.toString().contains(key)) {
385 int pos = source.toString().indexOf(key);
386 String tags = source.toString().substring(pos + key.length());
387 tags = tags.replace("+", "%32");
388 try {
389 return new URL(source.toString().substring(0, pos)
390 + "post/index/1/" + tags);
391 } catch (MalformedURLException e) {
392 Instance.getTraceHandler().error(e);
393 }
394 }
395 }
396 return super.getCanonicalUrl(source);
397 }
398
b5e9855b
NR
399 private boolean isPool(URL url) {
400 return url.getPath().startsWith("/pool/");
401 }
402
403 private boolean isSearch(URL url) {
9b863b20
NR
404 return url.getPath().startsWith("/post/index/")
405 || (url.getPath().equals("/post") && url.getQuery().startsWith(
406 "tags="));
b5e9855b 407 }
08fe2e33 408}