Commit | Line | Data |
---|---|---|
08fe2e33 NR |
1 | package be.nikiroo.fanfix.supported; |
2 | ||
3 | import java.io.IOException; | |
4 | import java.io.InputStream; | |
b5e9855b | 5 | import java.io.UnsupportedEncodingException; |
c4b18c94 | 6 | import java.net.MalformedURLException; |
08fe2e33 | 7 | import java.net.URL; |
b5e9855b | 8 | import java.net.URLDecoder; |
ce297a79 | 9 | import java.util.AbstractMap; |
08fe2e33 | 10 | import java.util.ArrayList; |
9b863b20 | 11 | import java.util.Collections; |
8ac3d099 | 12 | import java.util.Date; |
b5e9855b | 13 | import java.util.LinkedList; |
08fe2e33 NR |
14 | import java.util.List; |
15 | import java.util.Map.Entry; | |
8ac3d099 NR |
16 | |
17 | import org.jsoup.helper.DataUtil; | |
18 | import org.jsoup.nodes.Document; | |
19 | import org.jsoup.nodes.Element; | |
75002fcc | 20 | import org.jsoup.select.Elements; |
08fe2e33 NR |
21 | |
22 | import be.nikiroo.fanfix.Instance; | |
68686a37 | 23 | import be.nikiroo.fanfix.data.MetaData; |
8ac3d099 | 24 | import be.nikiroo.utils.IOUtils; |
16a81ef7 | 25 | import be.nikiroo.utils.Image; |
3b2b638f | 26 | import be.nikiroo.utils.Progress; |
08fe2e33 NR |
27 | import be.nikiroo.utils.StringUtils; |
28 | ||
29 | /** | |
8ac3d099 NR |
30 | * Support class for <a href="http://e621.net/">e621.net</a> and |
31 | * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics, | |
08fe2e33 NR |
32 | * including some of MLP. |
33 | * <p> | |
34 | * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and | |
35 | * comics, but it can be difficult to browse. | |
36 | * | |
37 | * @author niki | |
38 | */ | |
8ac3d099 | 39 | class E621 extends BasicSupport { |
08fe2e33 NR |
40 | @Override |
41 | protected boolean supports(URL url) { | |
42 | String host = url.getHost(); | |
43 | if (host.startsWith("www.")) { | |
44 | host = host.substring("www.".length()); | |
45 | } | |
46 | ||
8ac3d099 | 47 | return ("e621.net".equals(host) || "e926.net".equals(host)) && (isPool(url) || isSearchOrSet(url)); |
08fe2e33 NR |
48 | } |
49 | ||
50 | @Override | |
51 | protected boolean isHtml() { | |
52 | return true; | |
53 | } | |
54 | ||
8ac3d099 NR |
55 | @Override |
56 | protected MetaData getMeta() throws IOException { | |
57 | MetaData meta = new MetaData(); | |
b5e9855b | 58 | |
8ac3d099 NR |
59 | meta.setTitle(getTitle()); |
60 | meta.setAuthor(getAuthor()); | |
61 | meta.setDate(""); | |
62 | meta.setTags(getTags()); | |
63 | meta.setSource(getType().getSourceName()); | |
64 | meta.setUrl(getSource().toString()); | |
65 | meta.setPublisher(getType().getSourceName()); | |
66 | meta.setUuid(getSource().toString()); | |
67 | meta.setLuid(""); | |
68 | meta.setLang("en"); | |
69 | meta.setSubject("Furry"); | |
70 | meta.setType(getType().toString()); | |
71 | meta.setImageDocument(true); | |
72 | meta.setCover(getCover()); | |
73 | meta.setFakeCover(true); | |
595dfa7a | 74 | |
8ac3d099 | 75 | return meta; |
595dfa7a NR |
76 | } |
77 | ||
8ac3d099 NR |
78 | @Override |
79 | protected String getDesc() throws IOException { | |
80 | if (isSearchOrSet(getSource())) { | |
b5e9855b | 81 | StringBuilder builder = new StringBuilder(); |
8ac3d099 NR |
82 | builder.append("A collection of images from ").append(getSource().getHost()).append("\n") // |
83 | .append("\tTime of creation: " + StringUtils.fromTime(new Date().getTime())).append("\n") // | |
84 | .append("\tTags: ");// | |
85 | for (String tag : getTags()) { | |
86 | builder.append("\t\t").append(tag); | |
b5e9855b NR |
87 | } |
88 | ||
89 | return builder.toString(); | |
90 | } | |
91 | ||
8ac3d099 NR |
92 | if (isPool(getSource())) { |
93 | Element el = getSourceNode().getElementById("description"); | |
94 | if (el != null) { | |
95 | return el.text(); | |
08fe2e33 NR |
96 | } |
97 | } | |
98 | ||
99 | return null; | |
100 | } | |
101 | ||
08fe2e33 | 102 | @Override |
8ac3d099 | 103 | protected List<Entry<String, URL>> getChapters(Progress pg) throws IOException { |
75002fcc NR |
104 | List<Entry<String, URL>> chapters = new LinkedList<Entry<String, URL>>(); |
105 | ||
8ac3d099 NR |
106 | if (isPool(getSource())) { |
107 | String baseUrl = "https://e621.net/" + getSource().getPath() + "?page="; | |
75002fcc | 108 | chapters = getChapters(getSource(), pg, baseUrl, ""); |
8ac3d099 NR |
109 | } else if (isSearchOrSet(getSource())) { |
110 | String baseUrl = "https://e621.net/posts/?page="; | |
111 | String search = "&tags=" + getTagsFromUrl(getSource()); | |
75002fcc NR |
112 | |
113 | chapters = getChapters(getSource(), pg, | |
a351d69d | 114 | baseUrl, search); |
b5e9855b NR |
115 | } |
116 | ||
75002fcc NR |
117 | // sets and some pools are sorted in reverse order on the website |
118 | if (getSource().getPath().startsWith("/posts")) { | |
119 | Collections.reverse(chapters); | |
120 | } | |
121 | ||
122 | return chapters; | |
b5e9855b NR |
123 | } |
124 | ||
8ac3d099 NR |
125 | private List<Entry<String, URL>> getChapters(URL source, Progress pg, String baseUrl, String parameters) |
126 | throws IOException { | |
b5e9855b NR |
127 | List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>(); |
128 | ||
b5e9855b NR |
129 | if (source.getHost().contains("e926")) { |
130 | baseUrl = baseUrl.replace("e621", "e926"); | |
131 | } | |
132 | ||
133 | for (int i = 1; true; i++) { | |
8ac3d099 | 134 | URL url = new URL(baseUrl + i + parameters); |
b5e9855b | 135 | try { |
d66deb8d | 136 | InputStream pageI = Instance.getInstance().getCache().open(url, this, false); |
b5e9855b | 137 | try { |
8ac3d099 | 138 | if (IOUtils.readSmallStream(pageI).contains("Nobody here but us chickens!")) { |
b5e9855b | 139 | break; |
8ac3d099 NR |
140 | } |
141 | urls.add(new AbstractMap.SimpleEntry<String, URL>("Page " + Integer.toString(i), url)); | |
b5e9855b NR |
142 | } finally { |
143 | pageI.close(); | |
144 | } | |
145 | } catch (Exception e) { | |
146 | break; | |
147 | } | |
148 | } | |
149 | ||
150 | return urls; | |
151 | } | |
152 | ||
8ac3d099 NR |
153 | @Override |
154 | protected String getChapterContent(URL chapUrl, int number, Progress pg) throws IOException { | |
155 | StringBuilder builder = new StringBuilder(); | |
156 | Document chapterNode = loadDocument(chapUrl); | |
75002fcc NR |
157 | |
158 | Elements articles = chapterNode.getElementsByTag("article"); | |
159 | ||
160 | // sets and some pools are sorted in reverse order on the website | |
161 | if (getSource().getPath().startsWith("/posts")) { | |
162 | Collections.reverse(articles); | |
163 | } | |
164 | ||
165 | for (Element el : articles) { | |
8ac3d099 NR |
166 | builder.append("["); |
167 | builder.append(el.attr("data-file-url")); | |
168 | builder.append("]<br/>"); | |
169 | } | |
170 | ||
171 | return builder.toString(); | |
172 | } | |
173 | ||
174 | @Override | |
175 | protected URL getCanonicalUrl(URL source) { | |
8fbfa934 NR |
176 | // Convert search-pools into proper pools |
177 | if (source.getPath().equals("/posts") && source.getQuery() != null | |
178 | && source.getQuery().startsWith("tags=pool%3A")) { | |
179 | String poolNumber = source.getQuery() | |
180 | .substring("tags=pool%3A".length()); | |
181 | try { | |
182 | Integer.parseInt(poolNumber); | |
183 | String base = source.getProtocol() + "://" + source.getHost(); | |
184 | if (source.getPort() != -1) { | |
185 | base = base + ":" + source.getPort(); | |
186 | } | |
187 | source = new URL(base + "/posts/" + poolNumber); | |
188 | } catch (NumberFormatException e) { | |
189 | // Not a simple ppol, skip | |
190 | } catch (MalformedURLException e) { | |
191 | // Cannot happen | |
192 | } | |
193 | } | |
194 | ||
8ac3d099 NR |
195 | if (isSetOriginalUrl(source)) { |
196 | try { | |
d66deb8d | 197 | Document doc = DataUtil.load(Instance.getInstance().getCache().open(source, this, false), "UTF-8", source.toString()); |
8ac3d099 NR |
198 | for (Element shortname : doc.getElementsByClass("set-shortname")) { |
199 | for (Element el : shortname.getElementsByTag("a")) { | |
200 | if (!el.attr("href").isEmpty()) | |
201 | return new URL(el.absUrl("href")); | |
08fe2e33 NR |
202 | } |
203 | } | |
8ac3d099 | 204 | } catch (IOException e) { |
d66deb8d | 205 | Instance.getInstance().getTraceHandler().error(e); |
08fe2e33 NR |
206 | } |
207 | } | |
208 | ||
c4b18c94 NR |
209 | if (isPool(source)) { |
210 | try { | |
211 | return new URL(source.toString().replace("/pool/show/", "/pools/")); | |
212 | } catch (MalformedURLException e) { | |
213 | } | |
214 | } | |
215 | ||
8ac3d099 NR |
216 | return super.getCanonicalUrl(source); |
217 | } | |
218 | ||
219 | // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query | |
220 | private String getTagsFromUrl(URL url) { | |
221 | String tags = url == null ? "" : url.getQuery(); | |
222 | int pos = tags.indexOf("tags="); | |
223 | ||
224 | if (pos >= 0) { | |
225 | tags = tags.substring(pos).substring("tags=".length()); | |
226 | } else { | |
227 | return ""; | |
08fe2e33 NR |
228 | } |
229 | ||
8ac3d099 NR |
230 | pos = tags.indexOf('&'); |
231 | if (pos > 0) { | |
232 | tags = tags.substring(0, pos); | |
233 | } | |
234 | pos = tags.indexOf('/'); | |
235 | if (pos > 0) { | |
236 | tags = tags.substring(0, pos); | |
237 | } | |
238 | ||
239 | return tags; | |
08fe2e33 NR |
240 | } |
241 | ||
8ac3d099 NR |
242 | private String getTitle() { |
243 | String title = ""; | |
244 | ||
245 | Element el = getSourceNode().getElementsByTag("title").first(); | |
246 | if (el != null) { | |
247 | title = el.text().trim(); | |
08fe2e33 NR |
248 | } |
249 | ||
8ac3d099 NR |
250 | for (String s : new String[] { "e621", "-", "e621" }) { |
251 | if (title.startsWith(s)) { | |
252 | title = title.substring(s.length()).trim(); | |
08fe2e33 | 253 | } |
8ac3d099 NR |
254 | if (title.endsWith(s)) { |
255 | title = title.substring(0, title.length() - s.length()).trim(); | |
256 | } | |
257 | ||
08fe2e33 NR |
258 | } |
259 | ||
8ac3d099 NR |
260 | if (isSearchOrSet(getSource())) { |
261 | title = title.isEmpty() ? "e621" : "[e621] " + title; | |
262 | } | |
263 | return title; | |
08fe2e33 | 264 | } |
b5e9855b | 265 | |
8ac3d099 NR |
266 | private String getAuthor() throws IOException { |
267 | StringBuilder builder = new StringBuilder(); | |
268 | ||
269 | if (isSearchOrSet(getSource())) { | |
270 | for (Element el : getSourceNode().getElementsByClass("search-tag")) { | |
271 | if (el.attr("itemprop").equals("author")) { | |
272 | if (builder.length() > 0) { | |
273 | builder.append(", "); | |
274 | } | |
275 | builder.append(el.text().trim()); | |
9948521d | 276 | } |
8ac3d099 NR |
277 | } |
278 | } | |
279 | ||
280 | if (isPool(getSource())) { | |
281 | String desc = getDesc(); | |
282 | String descL = desc.toLowerCase(); | |
283 | ||
284 | if (descL.startsWith("by:") || descL.startsWith("by ")) { | |
285 | desc = desc.substring(3).trim(); | |
286 | desc = desc.split("\n")[0]; | |
287 | ||
288 | String tab[] = desc.split(" "); | |
289 | for (int i = 0; i < Math.min(tab.length, 5); i++) { | |
290 | if (tab[i].startsWith("http")) | |
291 | break; | |
292 | builder.append(" ").append(tab[i]); | |
9948521d | 293 | } |
8ac3d099 NR |
294 | } |
295 | } | |
9948521d | 296 | |
8ac3d099 NR |
297 | return builder.toString(); |
298 | } | |
299 | ||
300 | // no tags for pools | |
301 | private List<String> getTags() { | |
302 | List<String> tags = new ArrayList<String>(); | |
303 | if (isSearchOrSet(getSource())) { | |
304 | String str = getTagsFromUrl(getSource()); | |
305 | for (String tag : str.split("\\+")) { | |
9b863b20 | 306 | try { |
8ac3d099 NR |
307 | tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim()); |
308 | } catch (UnsupportedEncodingException e) { | |
9b863b20 NR |
309 | } |
310 | } | |
311 | } | |
9948521d | 312 | |
8ac3d099 NR |
313 | return tags; |
314 | } | |
315 | ||
316 | private Image getCover() throws IOException { | |
317 | Image image = null; | |
318 | List<Entry<String, URL>> chapters = getChapters(null); | |
319 | if (!chapters.isEmpty()) { | |
12c180fc NR |
320 | URL chap1Url = chapters.get(0).getValue(); |
321 | String imgsChap1 = getChapterContent(chap1Url, 1, null); | |
322 | if (!imgsChap1.isEmpty()) { | |
323 | imgsChap1 = imgsChap1.split("]")[0].substring(1).trim(); | |
324 | image = bsImages.getImage(this, new URL(imgsChap1)); | |
325 | } | |
8ac3d099 NR |
326 | } |
327 | ||
328 | return image; | |
329 | } | |
330 | ||
331 | // note: will be removed at getCanonicalUrl() | |
332 | private boolean isSetOriginalUrl(URL originalUrl) { | |
333 | return originalUrl.getPath().startsWith("/post_sets/"); | |
9b863b20 NR |
334 | } |
335 | ||
b5e9855b | 336 | private boolean isPool(URL url) { |
c4b18c94 | 337 | return url.getPath().startsWith("/pools/") || url.getPath().startsWith("/pool/show/"); |
b5e9855b NR |
338 | } |
339 | ||
8ac3d099 NR |
340 | // set will be renamed into search by canonical url |
341 | private boolean isSearchOrSet(URL url) { | |
342 | return | |
343 | // search: | |
344 | (url.getPath().equals("/posts") && url.getQuery().contains("tags=")) | |
345 | // or set: | |
346 | || isSetOriginalUrl(url); | |
b5e9855b | 347 | } |
08fe2e33 | 348 | } |