Try 3
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / E621.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.io.UnsupportedEncodingException;
6 import java.net.URL;
7 import java.net.URLDecoder;
8 import java.net.URLEncoder;
9 import java.util.AbstractMap;
10 import java.util.ArrayList;
11 import java.util.LinkedList;
12 import java.util.List;
13 import java.util.AbstractMap.SimpleEntry;
14 import java.util.Map.Entry;
15 import java.util.Scanner;
16
17 import be.nikiroo.fanfix.Instance;
18 import be.nikiroo.fanfix.data.Chapter;
19 import be.nikiroo.fanfix.data.MetaData;
20 import be.nikiroo.fanfix.data.Story;
21 import be.nikiroo.utils.Image;
22 import be.nikiroo.utils.Progress;
23 import be.nikiroo.utils.StringUtils;
24
25 /**
26 * Support class for <a href="http://e621.net/">e621.net</a> and <a
27 * href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
28 * including some of MLP.
29 * <p>
30 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
31 * comics, but it can be difficult to browse.
32 *
33 * @author niki
34 */
35 class E621 extends BasicSupport_Deprecated {
36 @Override
37 public String getSourceName() {
38 return "e621.net";
39 }
40
41 @Override
42 protected MetaData getMeta(URL source, InputStream in) throws IOException {
43 MetaData meta = new MetaData();
44
45 meta.setTitle(getTitle(reset(in)));
46 meta.setAuthor(getAuthor(source, reset(in)));
47 meta.setDate("");
48 meta.setTags(getTags(source, reset(in), false));
49 meta.setSource(getSourceName());
50 meta.setUrl(source.toString());
51 meta.setPublisher(getSourceName());
52 meta.setUuid(source.toString());
53 meta.setLuid("");
54 meta.setLang("en");
55 meta.setSubject("Furry");
56 meta.setType(getType().toString());
57 meta.setImageDocument(true);
58 meta.setCover(getCover(source, reset(in)));
59 meta.setFakeCover(true);
60
61 System.out.println("Meta from e621: "+meta);
62
63 return meta;
64 }
65
66 private List<String> getTags(URL source, InputStream in, boolean authors) {
67 List<String> tags = new ArrayList<String>();
68
69 if (isSearch(source)) {
70 String tagLine = getLine(in, "id=\"tag-sidebar\"", 1);
71 if (tagLine != null) {
72 String key = "href=\"";
73 for (int pos = tagLine.indexOf(key); pos >= 0; pos = tagLine
74 .indexOf(key, pos + 1)) {
75 int end = tagLine.indexOf("\"", pos + key.length());
76 if (end >= 0) {
77 String href = tagLine.substring(pos, end);
78 String subkey;
79 if (authors)
80 subkey = "?name=";
81 else
82 subkey = "?title=";
83 if (href.contains(subkey)) {
84 String tag = href.substring(href.indexOf(subkey)
85 + subkey.length());
86 try {
87 tags.add(URLDecoder.decode(tag, "UTF-8"));
88 } catch (UnsupportedEncodingException e) {
89 // supported JVMs must have UTF-8 support
90 e.printStackTrace();
91 }
92 }
93 }
94 }
95
96 }
97 }
98
99 return tags;
100 }
101
102 @Override
103 public Story process(URL url, Progress pg) throws IOException {
104 // There is no chapters on e621, just pagination...
105 Story story = super.process(url, pg);
106
107 Chapter only = new Chapter(1, null);
108 for (Chapter chap : story) {
109 only.getParagraphs().addAll(chap.getParagraphs());
110 }
111
112 story.getChapters().clear();
113 story.getChapters().add(only);
114
115 return story;
116 }
117
118 @Override
119 protected boolean supports(URL url) {
120 String host = url.getHost();
121 if (host.startsWith("www.")) {
122 host = host.substring("www.".length());
123 }
124
125 return ("e621.net".equals(host) || "e926.net".equals(host))
126 && (isPool(url) || isSearch(url));
127 }
128
129 @Override
130 protected boolean isHtml() {
131 return true;
132 }
133
134 private Image getCover(URL source, InputStream in) throws IOException {
135 URL urlForCover = source;
136 if (isPool(source)) {
137 urlForCover = new URL(source.toString() + "?page=1");
138 }
139
140 String images = getChapterContent(urlForCover, in, 1, null);
141 if (!images.isEmpty()) {
142 int pos = images.indexOf("<br/>");
143 if (pos >= 0) {
144 images = images.substring(1, pos - 1);
145 return getImage(this, null, images);
146 }
147 }
148
149 return null;
150 }
151
152 private String getAuthor(URL source, InputStream in) {
153 if (isSearch(source)) {
154 StringBuilder builder = new StringBuilder();
155 for (String author : getTags(source, in, true)) {
156 if (builder.length() > 0)
157 builder.append(", ");
158 builder.append(author);
159 }
160
161 return builder.toString();
162 }
163
164 String author = getLine(in, "href=\"/post/show/", 0);
165 if (author != null) {
166 String key = "href=\"";
167 int pos = author.indexOf(key);
168 if (pos >= 0) {
169 author = author.substring(pos + key.length());
170 pos = author.indexOf("\"");
171 if (pos >= 0) {
172 author = author.substring(0, pos - 1);
173 String page = source.getProtocol() + "://"
174 + source.getHost() + author;
175 try {
176 InputStream pageIn = Instance.getCache().open(
177 new URL(page), this, false);
178 try {
179 key = "class=\"tag-type-artist\"";
180 author = getLine(pageIn, key, 0);
181 if (author != null) {
182 pos = author.indexOf("<a href=\"");
183 if (pos >= 0) {
184 author = author.substring(pos);
185 pos = author.indexOf("</a>");
186 if (pos >= 0) {
187 author = author.substring(0, pos);
188 return StringUtils.unhtml(author);
189 }
190 }
191 }
192 } finally {
193 pageIn.close();
194 }
195 } catch (Exception e) {
196 // No author found
197 }
198 }
199 }
200 }
201
202 return null;
203 }
204
205 private String getTitle(InputStream in) {
206 String title = getLine(in, "<title>", 0);
207 if (title != null) {
208 int pos = title.indexOf('>');
209 if (pos >= 0) {
210 title = title.substring(pos + 1);
211 pos = title.indexOf('<');
212 if (pos >= 0) {
213 title = title.substring(0, pos);
214 }
215 }
216
217 if (title.startsWith("Pool:")) {
218 title = title.substring("Pool:".length());
219 }
220
221 title = StringUtils.unhtml(title).trim();
222 }
223
224 return title;
225 }
226
227 @Override
228 protected String getDesc(URL source, InputStream in) throws IOException {
229 String desc = getLine(in, "margin-bottom: 2em;", 0);
230
231 if (desc != null) {
232 StringBuilder builder = new StringBuilder();
233
234 boolean inTags = false;
235 for (char car : desc.toCharArray()) {
236 if ((inTags && car == '>') || (!inTags && car == '<')) {
237 inTags = !inTags;
238 }
239
240 if (inTags) {
241 builder.append(car);
242 }
243 }
244
245 return builder.toString().trim();
246 }
247
248 return null;
249 }
250
251 @Override
252 protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
253 Progress pg) throws IOException {
254 if (isPool(source)) {
255 return getChaptersPool(source, in, pg);
256 } else if (isSearch(source)) {
257 return getChaptersSearch(source, in, pg);
258 }
259
260 return new LinkedList<Entry<String, URL>>();
261 }
262
263 private List<Entry<String, URL>> getChaptersSearch(URL source,
264 InputStream in, Progress pg) throws IOException {
265 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
266
267 String search = source.getPath();
268 if (search.endsWith("/")) {
269 search = search.substring(0, search.length() - 1);
270 }
271
272 int pos = search.lastIndexOf('/');
273 if (pos >= 0) {
274 search = search.substring(pos + 1);
275 }
276
277 String baseUrl = "https://e621.net/post/index/";
278 if (source.getHost().contains("e926")) {
279 baseUrl = baseUrl.replace("e621", "e926");
280 }
281
282 for (int i = 1; true; i++) {
283 URL url = new URL(baseUrl + i + "/" + search + "/");
284 try {
285 InputStream pageI = Instance.getCache().open(url, this, false);
286 try {
287 if (getLine(pageI, "No posts matched your search.", 0) != null)
288 break;
289 urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
290 .toString(i), url));
291 } finally {
292 pageI.close();
293 }
294 } catch (Exception e) {
295 break;
296 }
297 }
298
299 return urls;
300 }
301
302 private List<Entry<String, URL>> getChaptersPool(URL source,
303 InputStream in, Progress pg) throws IOException {
304 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
305 int last = 1; // no pool/show when only one page
306
307 @SuppressWarnings("resource")
308 Scanner scan = new Scanner(in, "UTF-8");
309 scan.useDelimiter("\\n");
310 while (scan.hasNext()) {
311 String line = scan.next();
312 for (int pos = line.indexOf(source.getPath()); pos >= 0; pos = line
313 .indexOf(source.getPath(), pos + source.getPath().length())) {
314 int equalPos = line.indexOf("=", pos);
315 int quotePos = line.indexOf("\"", pos);
316 if (equalPos >= 0 && quotePos > equalPos) {
317 String snum = line.substring(equalPos + 1, quotePos);
318 try {
319 int num = Integer.parseInt(snum);
320 if (num > last) {
321 last = num;
322 }
323 } catch (NumberFormatException e) {
324 }
325 }
326 }
327 }
328
329 for (int i = 1; i <= last; i++) {
330 urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
331 .toString(i), new URL(source.toString() + "?page=" + i)));
332 }
333
334 return urls;
335 }
336
337 @Override
338 protected String getChapterContent(URL source, InputStream in, int number,
339 Progress pg) throws IOException {
340 StringBuilder builder = new StringBuilder();
341 String staticSite = "https://static1.e621.net";
342 if (source.getHost().contains("e926")) {
343 staticSite = staticSite.replace("e621", "e926");
344 }
345
346 String key = staticSite + "/data/preview/";
347
348 @SuppressWarnings("resource")
349 Scanner scan = new Scanner(in, "UTF-8");
350 scan.useDelimiter("\\n");
351 while (scan.hasNext()) {
352 String line = scan.next();
353 if (line.contains("class=\"preview")) {
354 for (int pos = line.indexOf(key); pos >= 0; pos = line.indexOf(
355 key, pos + key.length())) {
356 int endPos = line.indexOf("\"", pos);
357 if (endPos >= 0) {
358 String id = line.substring(pos + key.length(), endPos);
359 id = staticSite + "/data/" + id;
360
361 int dotPos = id.lastIndexOf(".");
362 if (dotPos >= 0) {
363 id = id.substring(0, dotPos);
364 builder.append("[");
365 builder.append(id);
366 builder.append("]<br/>");
367 }
368 }
369 }
370 }
371 }
372
373 return builder.toString();
374 }
375
376 private boolean isPool(URL url) {
377 return url.getPath().startsWith("/pool/");
378 }
379
380 private boolean isSearch(URL url) {
381 return url.getPath().startsWith("/post/index/");
382 }
383 }