Try 2
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / E621.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.io.UnsupportedEncodingException;
6 import java.net.URL;
7 import java.net.URLDecoder;
8 import java.net.URLEncoder;
9 import java.util.AbstractMap;
10 import java.util.ArrayList;
11 import java.util.LinkedList;
12 import java.util.List;
13 import java.util.AbstractMap.SimpleEntry;
14 import java.util.Map.Entry;
15 import java.util.Scanner;
16
17 import be.nikiroo.fanfix.Instance;
18 import be.nikiroo.fanfix.data.Chapter;
19 import be.nikiroo.fanfix.data.MetaData;
20 import be.nikiroo.fanfix.data.Story;
21 import be.nikiroo.utils.Image;
22 import be.nikiroo.utils.Progress;
23 import be.nikiroo.utils.StringUtils;
24
25 /**
26 * Support class for <a href="http://e621.net/">e621.net</a> and <a
27 * href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
28 * including some of MLP.
29 * <p>
30 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
31 * comics, but it can be difficult to browse.
32 *
33 * @author niki
34 */
35 class E621 extends BasicSupport_Deprecated {
36 @Override
37 public String getSourceName() {
38 return "e621.net";
39 }
40
41 @Override
42 protected MetaData getMeta(URL source, InputStream in) throws IOException {
43 MetaData meta = new MetaData();
44
45 meta.setTitle(getTitle(reset(in)));
46 meta.setAuthor(getAuthor(source, reset(in)));
47 meta.setDate("");
48 meta.setTags(getTags(source, reset(in), false));
49 meta.setSource(getSourceName());
50 meta.setUrl(source.toString());
51 meta.setPublisher(getSourceName());
52 meta.setUuid(source.toString());
53 meta.setLuid("");
54 meta.setLang("en");
55 meta.setSubject("Furry");
56 meta.setType(getType().toString());
57 meta.setImageDocument(true);
58 meta.setCover(getCover(source, reset(in)));
59 meta.setFakeCover(true);
60
61 return meta;
62 }
63
64 private List<String> getTags(URL source, InputStream in, boolean authors) {
65 List<String> tags = new ArrayList<String>();
66
67 if (isSearch(source)) {
68 String tagLine = getLine(in, "id=\"tag-sidebar\"", 1);
69 if (tagLine != null) {
70 String key = "href=\"";
71 for (int pos = tagLine.indexOf(key); pos >= 0; pos = tagLine
72 .indexOf(key, pos + 1)) {
73 int end = tagLine.indexOf("\"", pos + key.length());
74 if (end >= 0) {
75 String href = tagLine.substring(pos, end);
76 String subkey;
77 if (authors)
78 subkey = "?name=";
79 else
80 subkey = "?title=";
81 if (href.contains(subkey)) {
82 String tag = href.substring(href.indexOf(subkey)
83 + subkey.length());
84 try {
85 tags.add(URLDecoder.decode(tag, "UTF-8"));
86 } catch (UnsupportedEncodingException e) {
87 // supported JVMs must have UTF-8 support
88 e.printStackTrace();
89 }
90 }
91 }
92 }
93
94 }
95 }
96
97 return tags;
98 }
99
100 @Override
101 public Story process(URL url, Progress pg) throws IOException {
102 // There is no chapters on e621, just pagination...
103 Story story = super.process(url, pg);
104
105 Chapter only = new Chapter(1, null);
106 for (Chapter chap : story) {
107 only.getParagraphs().addAll(chap.getParagraphs());
108 }
109
110 story.getChapters().clear();
111 story.getChapters().add(only);
112
113 return story;
114 }
115
116 @Override
117 protected boolean supports(URL url) {
118 String host = url.getHost();
119 if (host.startsWith("www.")) {
120 host = host.substring("www.".length());
121 }
122
123 return ("e621.net".equals(host) || "e926.net".equals(host))
124 && (isPool(url) || isSearch(url));
125 }
126
127 @Override
128 protected boolean isHtml() {
129 return true;
130 }
131
132 private Image getCover(URL source, InputStream in) throws IOException {
133 URL urlForCover = source;
134 if (isPool(source)) {
135 urlForCover = new URL(source.toString() + "?page=1");
136 }
137
138 String images = getChapterContent(urlForCover, in, 1, null);
139 if (!images.isEmpty()) {
140 int pos = images.indexOf("<br/>");
141 if (pos >= 0) {
142 images = images.substring(1, pos - 1);
143 return getImage(this, null, images);
144 }
145 }
146
147 return null;
148 }
149
150 private String getAuthor(URL source, InputStream in) {
151 if (isSearch(source)) {
152 StringBuilder builder = new StringBuilder();
153 for (String author : getTags(source, in, true)) {
154 if (builder.length() > 0)
155 builder.append(", ");
156 builder.append(author);
157 }
158
159 return builder.toString();
160 }
161
162 String author = getLine(in, "href=\"/post/show/", 0);
163 if (author != null) {
164 String key = "href=\"";
165 int pos = author.indexOf(key);
166 if (pos >= 0) {
167 author = author.substring(pos + key.length());
168 pos = author.indexOf("\"");
169 if (pos >= 0) {
170 author = author.substring(0, pos - 1);
171 String page = source.getProtocol() + "://"
172 + source.getHost() + author;
173 try {
174 InputStream pageIn = Instance.getCache().open(
175 new URL(page), this, false);
176 try {
177 key = "class=\"tag-type-artist\"";
178 author = getLine(pageIn, key, 0);
179 if (author != null) {
180 pos = author.indexOf("<a href=\"");
181 if (pos >= 0) {
182 author = author.substring(pos);
183 pos = author.indexOf("</a>");
184 if (pos >= 0) {
185 author = author.substring(0, pos);
186 return StringUtils.unhtml(author);
187 }
188 }
189 }
190 } finally {
191 pageIn.close();
192 }
193 } catch (Exception e) {
194 // No author found
195 }
196 }
197 }
198 }
199
200 return null;
201 }
202
203 private String getTitle(InputStream in) {
204 String title = getLine(in, "<title>", 0);
205 if (title != null) {
206 int pos = title.indexOf('>');
207 if (pos >= 0) {
208 title = title.substring(pos + 1);
209 pos = title.indexOf('<');
210 if (pos >= 0) {
211 title = title.substring(0, pos);
212 }
213 }
214
215 if (title.startsWith("Pool:")) {
216 title = title.substring("Pool:".length());
217 }
218
219 title = StringUtils.unhtml(title).trim();
220 }
221
222 return title;
223 }
224
225 @Override
226 protected String getDesc(URL source, InputStream in) throws IOException {
227 String desc = getLine(in, "margin-bottom: 2em;", 0);
228
229 if (desc != null) {
230 StringBuilder builder = new StringBuilder();
231
232 boolean inTags = false;
233 for (char car : desc.toCharArray()) {
234 if ((inTags && car == '>') || (!inTags && car == '<')) {
235 inTags = !inTags;
236 }
237
238 if (inTags) {
239 builder.append(car);
240 }
241 }
242
243 return builder.toString().trim();
244 }
245
246 return null;
247 }
248
249 @Override
250 protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
251 Progress pg) throws IOException {
252 if (isPool(source)) {
253 return getChaptersPool(source, in, pg);
254 } else if (isSearch(source)) {
255 return getChaptersSearch(source, in, pg);
256 }
257
258 return new LinkedList<Entry<String, URL>>();
259 }
260
261 private List<Entry<String, URL>> getChaptersSearch(URL source,
262 InputStream in, Progress pg) throws IOException {
263 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
264
265 String search = source.getPath();
266 if (search.endsWith("/")) {
267 search = search.substring(0, search.length() - 1);
268 }
269
270 int pos = search.lastIndexOf('/');
271 if (pos >= 0) {
272 search = search.substring(pos + 1);
273 }
274
275 String baseUrl = "https://e621.net/post/index/";
276 if (source.getHost().contains("e926")) {
277 baseUrl = baseUrl.replace("e621", "e926");
278 }
279
280 for (int i = 1; true; i++) {
281 URL url = new URL(baseUrl + i + "/" + search + "/");
282 try {
283 InputStream pageI = Instance.getCache().open(url, this, false);
284 try {
285 if (getLine(pageI, "No posts matched your search.", 0) != null)
286 break;
287 urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
288 .toString(i), url));
289 } finally {
290 pageI.close();
291 }
292 } catch (Exception e) {
293 break;
294 }
295 }
296
297 return urls;
298 }
299
300 private List<Entry<String, URL>> getChaptersPool(URL source,
301 InputStream in, Progress pg) throws IOException {
302 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
303 int last = 1; // no pool/show when only one page
304
305 @SuppressWarnings("resource")
306 Scanner scan = new Scanner(in, "UTF-8");
307 scan.useDelimiter("\\n");
308 while (scan.hasNext()) {
309 String line = scan.next();
310 for (int pos = line.indexOf(source.getPath()); pos >= 0; pos = line
311 .indexOf(source.getPath(), pos + source.getPath().length())) {
312 int equalPos = line.indexOf("=", pos);
313 int quotePos = line.indexOf("\"", pos);
314 if (equalPos >= 0 && quotePos > equalPos) {
315 String snum = line.substring(equalPos + 1, quotePos);
316 try {
317 int num = Integer.parseInt(snum);
318 if (num > last) {
319 last = num;
320 }
321 } catch (NumberFormatException e) {
322 }
323 }
324 }
325 }
326
327 for (int i = 1; i <= last; i++) {
328 urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
329 .toString(i), new URL(source.toString() + "?page=" + i)));
330 }
331
332 return urls;
333 }
334
335 @Override
336 protected String getChapterContent(URL source, InputStream in, int number,
337 Progress pg) throws IOException {
338 StringBuilder builder = new StringBuilder();
339 String staticSite = "https://static1.e621.net";
340 if (source.getHost().contains("e926")) {
341 staticSite = staticSite.replace("e621", "e926");
342 }
343
344 String key = staticSite + "/data/preview/";
345
346 @SuppressWarnings("resource")
347 Scanner scan = new Scanner(in, "UTF-8");
348 scan.useDelimiter("\\n");
349 while (scan.hasNext()) {
350 String line = scan.next();
351 if (line.contains("class=\"preview")) {
352 for (int pos = line.indexOf(key); pos >= 0; pos = line.indexOf(
353 key, pos + key.length())) {
354 int endPos = line.indexOf("\"", pos);
355 if (endPos >= 0) {
356 String id = line.substring(pos + key.length(), endPos);
357 id = staticSite + "/data/" + id;
358
359 int dotPos = id.lastIndexOf(".");
360 if (dotPos >= 0) {
361 id = id.substring(0, dotPos);
362 builder.append("[");
363 builder.append(id);
364 builder.append("]<br/>");
365 }
366 }
367 }
368 }
369 }
370
371 return builder.toString();
372 }
373
374 private boolean isPool(URL url) {
375 return url.getPath().startsWith("/pool/");
376 }
377
378 private boolean isSearch(URL url) {
379 return url.getPath().startsWith("/post/index/");
380 }
381 }