separate support name and BasicSupport
[fanfix.git] / src / be / nikiroo / fanfix / supported / E621.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
3import java.io.IOException;
4import java.io.InputStream;
b5e9855b 5import java.io.UnsupportedEncodingException;
9b863b20 6import java.net.MalformedURLException;
08fe2e33 7import java.net.URL;
b5e9855b 8import java.net.URLDecoder;
ce297a79 9import java.util.AbstractMap;
08fe2e33 10import java.util.ArrayList;
9b863b20 11import java.util.Collections;
b5e9855b 12import java.util.LinkedList;
08fe2e33
NR
13import java.util.List;
14import java.util.Map.Entry;
15import java.util.Scanner;
16
17import be.nikiroo.fanfix.Instance;
18import be.nikiroo.fanfix.data.Chapter;
68686a37 19import be.nikiroo.fanfix.data.MetaData;
08fe2e33 20import be.nikiroo.fanfix.data.Story;
16a81ef7 21import be.nikiroo.utils.Image;
3b2b638f 22import be.nikiroo.utils.Progress;
08fe2e33
NR
23import be.nikiroo.utils.StringUtils;
24
25/**
26 * Support class for <a href="http://e621.net/">e621.net</a> and <a
27 * href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
28 * including some of MLP.
29 * <p>
30 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
31 * comics, but it can be difficult to browse.
32 *
33 * @author niki
34 */
0ffa4754 35class E621 extends BasicSupport_Deprecated {
08fe2e33 36 @Override
68686a37
NR
37 protected MetaData getMeta(URL source, InputStream in) throws IOException {
38 MetaData meta = new MetaData();
39
40 meta.setTitle(getTitle(reset(in)));
41 meta.setAuthor(getAuthor(source, reset(in)));
42 meta.setDate("");
b5e9855b 43 meta.setTags(getTags(source, reset(in), false));
727108fe 44 meta.setSource(getType().getSourceName());
2206ef66 45 meta.setUrl(source.toString());
727108fe 46 meta.setPublisher(getType().getSourceName());
68686a37
NR
47 meta.setUuid(source.toString());
48 meta.setLuid("");
276f95c6 49 meta.setLang("en");
a4143cd7 50 meta.setSubject("Furry");
68686a37
NR
51 meta.setType(getType().toString());
52 meta.setImageDocument(true);
b5e9855b 53 meta.setCover(getCover(source, reset(in)));
a9eb3f46 54 meta.setFakeCover(true);
68686a37
NR
55
56 return meta;
08fe2e33
NR
57 }
58
b5e9855b
NR
59 private List<String> getTags(URL source, InputStream in, boolean authors) {
60 List<String> tags = new ArrayList<String>();
61
62 if (isSearch(source)) {
63 String tagLine = getLine(in, "id=\"tag-sidebar\"", 1);
64 if (tagLine != null) {
65 String key = "href=\"";
66 for (int pos = tagLine.indexOf(key); pos >= 0; pos = tagLine
67 .indexOf(key, pos + 1)) {
68 int end = tagLine.indexOf("\"", pos + key.length());
69 if (end >= 0) {
70 String href = tagLine.substring(pos, end);
71 String subkey;
72 if (authors)
73 subkey = "?name=";
74 else
75 subkey = "?title=";
76 if (href.contains(subkey)) {
77 String tag = href.substring(href.indexOf(subkey)
78 + subkey.length());
79 try {
80 tags.add(URLDecoder.decode(tag, "UTF-8"));
81 } catch (UnsupportedEncodingException e) {
82 // supported JVMs must have UTF-8 support
83 e.printStackTrace();
84 }
85 }
86 }
87 }
88
89 }
90 }
91
92 return tags;
93 }
94
08fe2e33 95 @Override
92fb0719 96 public Story process(URL url, Progress pg) throws IOException {
08fe2e33 97 // There is no chapters on e621, just pagination...
92fb0719 98 Story story = super.process(url, pg);
08fe2e33
NR
99
100 Chapter only = new Chapter(1, null);
101 for (Chapter chap : story) {
102 only.getParagraphs().addAll(chap.getParagraphs());
103 }
104
105 story.getChapters().clear();
106 story.getChapters().add(only);
107
108 return story;
109 }
110
111 @Override
112 protected boolean supports(URL url) {
113 String host = url.getHost();
114 if (host.startsWith("www.")) {
115 host = host.substring("www.".length());
116 }
117
118 return ("e621.net".equals(host) || "e926.net".equals(host))
b5e9855b 119 && (isPool(url) || isSearch(url));
08fe2e33
NR
120 }
121
122 @Override
123 protected boolean isHtml() {
124 return true;
125 }
126
b5e9855b 127 private Image getCover(URL source, InputStream in) throws IOException {
678390e0
NR
128 URL urlForCover = source;
129 if (isPool(source)) {
130 urlForCover = new URL(source.toString() + "?page=1");
131 }
b5e9855b 132
678390e0 133 String images = getChapterContent(urlForCover, in, 1, null);
595dfa7a 134 if (!images.isEmpty()) {
406447a4 135 int pos = images.indexOf("<br/>");
595dfa7a
NR
136 if (pos >= 0) {
137 images = images.substring(1, pos - 1);
138 return getImage(this, null, images);
139 }
140 }
141
142 return null;
143 }
144
211f7ddb 145 private String getAuthor(URL source, InputStream in) {
b5e9855b
NR
146 if (isSearch(source)) {
147 StringBuilder builder = new StringBuilder();
148 for (String author : getTags(source, in, true)) {
149 if (builder.length() > 0)
150 builder.append(", ");
151 builder.append(author);
152 }
153
154 return builder.toString();
155 }
156
08fe2e33
NR
157 String author = getLine(in, "href=\"/post/show/", 0);
158 if (author != null) {
159 String key = "href=\"";
160 int pos = author.indexOf(key);
161 if (pos >= 0) {
162 author = author.substring(pos + key.length());
163 pos = author.indexOf("\"");
164 if (pos >= 0) {
165 author = author.substring(0, pos - 1);
166 String page = source.getProtocol() + "://"
167 + source.getHost() + author;
08fe2e33 168 try {
7d0d2be6
NR
169 InputStream pageIn = Instance.getCache().open(
170 new URL(page), this, false);
171 try {
172 key = "class=\"tag-type-artist\"";
173 author = getLine(pageIn, key, 0);
174 if (author != null) {
175 pos = author.indexOf("<a href=\"");
08fe2e33 176 if (pos >= 0) {
7d0d2be6
NR
177 author = author.substring(pos);
178 pos = author.indexOf("</a>");
179 if (pos >= 0) {
180 author = author.substring(0, pos);
181 return StringUtils.unhtml(author);
182 }
08fe2e33
NR
183 }
184 }
7d0d2be6
NR
185 } finally {
186 pageIn.close();
08fe2e33 187 }
7d0d2be6
NR
188 } catch (Exception e) {
189 // No author found
08fe2e33
NR
190 }
191 }
192 }
193 }
194
195 return null;
196 }
197
211f7ddb 198 private String getTitle(InputStream in) {
08fe2e33
NR
199 String title = getLine(in, "<title>", 0);
200 if (title != null) {
201 int pos = title.indexOf('>');
202 if (pos >= 0) {
203 title = title.substring(pos + 1);
204 pos = title.indexOf('<');
205 if (pos >= 0) {
206 title = title.substring(0, pos);
207 }
208 }
209
210 if (title.startsWith("Pool:")) {
211 title = title.substring("Pool:".length());
212 }
213
68686a37 214 title = StringUtils.unhtml(title).trim();
08fe2e33
NR
215 }
216
217 return title;
218 }
219
220 @Override
221 protected String getDesc(URL source, InputStream in) throws IOException {
222 String desc = getLine(in, "margin-bottom: 2em;", 0);
223
224 if (desc != null) {
225 StringBuilder builder = new StringBuilder();
226
227 boolean inTags = false;
228 for (char car : desc.toCharArray()) {
229 if ((inTags && car == '>') || (!inTags && car == '<')) {
230 inTags = !inTags;
231 }
232
233 if (inTags) {
234 builder.append(car);
235 }
236 }
237
238 return builder.toString().trim();
239 }
240
241 return null;
242 }
243
244 @Override
ed08c171
NR
245 protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
246 Progress pg) throws IOException {
b5e9855b
NR
247 if (isPool(source)) {
248 return getChaptersPool(source, in, pg);
249 } else if (isSearch(source)) {
250 return getChaptersSearch(source, in, pg);
251 }
252
253 return new LinkedList<Entry<String, URL>>();
254 }
255
256 private List<Entry<String, URL>> getChaptersSearch(URL source,
257 InputStream in, Progress pg) throws IOException {
258 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
259
260 String search = source.getPath();
261 if (search.endsWith("/")) {
262 search = search.substring(0, search.length() - 1);
263 }
264
265 int pos = search.lastIndexOf('/');
266 if (pos >= 0) {
267 search = search.substring(pos + 1);
268 }
269
270 String baseUrl = "https://e621.net/post/index/";
271 if (source.getHost().contains("e926")) {
272 baseUrl = baseUrl.replace("e621", "e926");
273 }
274
275 for (int i = 1; true; i++) {
276 URL url = new URL(baseUrl + i + "/" + search + "/");
277 try {
278 InputStream pageI = Instance.getCache().open(url, this, false);
279 try {
280 if (getLine(pageI, "No posts matched your search.", 0) != null)
281 break;
9b863b20
NR
282 urls.add(new AbstractMap.SimpleEntry<String, URL>("Page "
283 + Integer.toString(i), url));
b5e9855b
NR
284 } finally {
285 pageI.close();
286 }
287 } catch (Exception e) {
288 break;
289 }
290 }
291
9b863b20
NR
292 // They are sorted in reverse order on the website
293 Collections.reverse(urls);
b5e9855b
NR
294 return urls;
295 }
296
297 private List<Entry<String, URL>> getChaptersPool(URL source,
298 InputStream in, Progress pg) throws IOException {
08fe2e33
NR
299 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
300 int last = 1; // no pool/show when only one page
301
302 @SuppressWarnings("resource")
303 Scanner scan = new Scanner(in, "UTF-8");
304 scan.useDelimiter("\\n");
305 while (scan.hasNext()) {
306 String line = scan.next();
307 for (int pos = line.indexOf(source.getPath()); pos >= 0; pos = line
308 .indexOf(source.getPath(), pos + source.getPath().length())) {
309 int equalPos = line.indexOf("=", pos);
310 int quotePos = line.indexOf("\"", pos);
311 if (equalPos >= 0 && quotePos > equalPos) {
312 String snum = line.substring(equalPos + 1, quotePos);
313 try {
314 int num = Integer.parseInt(snum);
315 if (num > last) {
316 last = num;
317 }
318 } catch (NumberFormatException e) {
319 }
320 }
321 }
322 }
323
324 for (int i = 1; i <= last; i++) {
ce297a79
NR
325 urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
326 .toString(i), new URL(source.toString() + "?page=" + i)));
08fe2e33
NR
327 }
328
329 return urls;
330 }
331
332 @Override
ed08c171
NR
333 protected String getChapterContent(URL source, InputStream in, int number,
334 Progress pg) throws IOException {
08fe2e33
NR
335 StringBuilder builder = new StringBuilder();
336 String staticSite = "https://static1.e621.net";
337 if (source.getHost().contains("e926")) {
338 staticSite = staticSite.replace("e621", "e926");
339 }
340
341 String key = staticSite + "/data/preview/";
342
343 @SuppressWarnings("resource")
344 Scanner scan = new Scanner(in, "UTF-8");
345 scan.useDelimiter("\\n");
346 while (scan.hasNext()) {
347 String line = scan.next();
d98a2900 348 if (line.contains("class=\"preview")) {
08fe2e33
NR
349 for (int pos = line.indexOf(key); pos >= 0; pos = line.indexOf(
350 key, pos + key.length())) {
351 int endPos = line.indexOf("\"", pos);
352 if (endPos >= 0) {
353 String id = line.substring(pos + key.length(), endPos);
354 id = staticSite + "/data/" + id;
355
356 int dotPos = id.lastIndexOf(".");
357 if (dotPos >= 0) {
358 id = id.substring(0, dotPos);
359 builder.append("[");
360 builder.append(id);
406447a4 361 builder.append("]<br/>");
08fe2e33
NR
362 }
363 }
364 }
365 }
366 }
367
368 return builder.toString();
369 }
b5e9855b 370
9b863b20
NR
371 @Override
372 protected URL getCanonicalUrl(URL source) {
373 if (isSearch(source)) {
374 // /post?tags=tag1+tag2 -> ../post/index/1/tag1%32tag2
9948521d 375 String key = "?tags=";
9b863b20
NR
376 if (source.toString().contains(key)) {
377 int pos = source.toString().indexOf(key);
378 String tags = source.toString().substring(pos + key.length());
1822d603 379 tags = tags.replace("+", "%20");
9948521d
NR
380
381 String base = source.toString().substring(0, pos);
382 if (!base.endsWith("/")) {
383 base += "/";
384 }
385 if (base.endsWith("/search/")) {
386 base = base.substring(0, base.indexOf("/search/") + 1);
387 }
388
9b863b20 389 try {
9948521d 390 return new URL(base + "index/1/" + tags);
9b863b20
NR
391 } catch (MalformedURLException e) {
392 Instance.getTraceHandler().error(e);
393 }
394 }
395 }
9948521d 396
9b863b20
NR
397 return super.getCanonicalUrl(source);
398 }
399
b5e9855b
NR
400 private boolean isPool(URL url) {
401 return url.getPath().startsWith("/pool/");
402 }
403
404 private boolean isSearch(URL url) {
9b863b20 405 return url.getPath().startsWith("/post/index/")
14d8be1c
NR
406 || (url.getPath().equals("/post/search") && url.getQuery()
407 .startsWith("tags="));
b5e9855b 408 }
08fe2e33 409}