1 package be
.nikiroo
.fanfix
.supported
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
5 import java
.io
.UnsupportedEncodingException
;
6 import java
.net
.MalformedURLException
;
8 import java
.net
.URLDecoder
;
9 import java
.util
.AbstractMap
;
10 import java
.util
.ArrayList
;
11 import java
.util
.Collections
;
12 import java
.util
.Date
;
13 import java
.util
.LinkedList
;
14 import java
.util
.List
;
15 import java
.util
.Map
.Entry
;
17 import org
.jsoup
.helper
.DataUtil
;
18 import org
.jsoup
.nodes
.Document
;
19 import org
.jsoup
.nodes
.Element
;
20 import org
.jsoup
.select
.Elements
;
22 import be
.nikiroo
.fanfix
.Instance
;
23 import be
.nikiroo
.fanfix
.data
.MetaData
;
24 import be
.nikiroo
.utils
.IOUtils
;
25 import be
.nikiroo
.utils
.Image
;
26 import be
.nikiroo
.utils
.Progress
;
27 import be
.nikiroo
.utils
.StringUtils
;
30 * Support class for <a href="http://e621.net/">e621.net</a> and
31 * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
32 * including some of MLP.
34 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
35 * comics, but it can be difficult to browse.
39 class E621
extends BasicSupport
{
41 protected boolean supports(URL url
) {
42 String host
= url
.getHost();
43 if (host
.startsWith("www.")) {
44 host
= host
.substring("www.".length());
47 return ("e621.net".equals(host
) || "e926.net".equals(host
)) && (isPool(url
) || isSearchOrSet(url
));
51 protected boolean isHtml() {
56 protected MetaData
getMeta() throws IOException
{
57 MetaData meta
= new MetaData();
59 meta
.setTitle(getTitle());
60 meta
.setAuthor(getAuthor());
62 meta
.setTags(getTags());
63 meta
.setSource(getType().getSourceName());
64 meta
.setUrl(getSource().toString());
65 meta
.setPublisher(getType().getSourceName());
66 meta
.setUuid(getSource().toString());
69 meta
.setSubject("Furry");
70 meta
.setType(getType().toString());
71 meta
.setImageDocument(true);
72 meta
.setCover(getCover());
73 meta
.setFakeCover(true);
79 protected String
getDesc() throws IOException
{
80 if (isSearchOrSet(getSource())) {
81 StringBuilder builder
= new StringBuilder();
82 builder
.append("A collection of images from ").append(getSource().getHost()).append("\n") //
83 .append("\tTime of creation: " + StringUtils
.fromTime(new Date().getTime())).append("\n") //
84 .append("\tTags: ");//
85 for (String tag
: getTags()) {
86 builder
.append("\t\t").append(tag
);
89 return builder
.toString();
92 if (isPool(getSource())) {
93 Element el
= getSourceNode().getElementById("description");
103 protected List
<Entry
<String
, URL
>> getChapters(Progress pg
) throws IOException
{
104 List
<Entry
<String
, URL
>> chapters
= new LinkedList
<Entry
<String
, URL
>>();
106 if (isPool(getSource())) {
107 String baseUrl
= "https://e621.net/" + getSource().getPath() + "?page=";
108 chapters
= getChapters(getSource(), pg
, baseUrl
, "");
109 } else if (isSearchOrSet(getSource())) {
110 String baseUrl
= "https://e621.net/posts/?page=";
111 String search
= "&tags=" + getTagsFromUrl(getSource());
113 chapters
= getChapters(getSource(), pg
,
117 // sets and some pools are sorted in reverse order on the website
118 if (getSource().getPath().startsWith("/posts")) {
119 Collections
.reverse(chapters
);
125 private List
<Entry
<String
, URL
>> getChapters(URL source
, Progress pg
, String baseUrl
, String parameters
)
127 List
<Entry
<String
, URL
>> urls
= new ArrayList
<Entry
<String
, URL
>>();
129 if (source
.getHost().contains("e926")) {
130 baseUrl
= baseUrl
.replace("e621", "e926");
133 for (int i
= 1; true; i
++) {
134 URL url
= new URL(baseUrl
+ i
+ parameters
);
136 InputStream pageI
= Instance
.getInstance().getCache().open(url
, this, false);
138 if (IOUtils
.readSmallStream(pageI
).contains("Nobody here but us chickens!")) {
141 urls
.add(new AbstractMap
.SimpleEntry
<String
, URL
>("Page " + Integer
.toString(i
), url
));
145 } catch (Exception e
) {
154 protected String
getChapterContent(URL chapUrl
, int number
, Progress pg
) throws IOException
{
155 StringBuilder builder
= new StringBuilder();
156 Document chapterNode
= loadDocument(chapUrl
);
158 Elements articles
= chapterNode
.getElementsByTag("article");
160 // sets and some pools are sorted in reverse order on the website
161 if (getSource().getPath().startsWith("/posts")) {
162 Collections
.reverse(articles
);
165 for (Element el
: articles
) {
167 builder
.append(el
.attr("data-file-url"));
168 builder
.append("]<br/>");
171 return builder
.toString();
175 protected URL
getCanonicalUrl(URL source
) {
176 // Convert search-pools into proper pools
177 if (source
.getPath().equals("/posts") && source
.getQuery() != null
178 && source
.getQuery().startsWith("tags=pool%3A")) {
179 String poolNumber
= source
.getQuery()
180 .substring("tags=pool%3A".length());
182 Integer
.parseInt(poolNumber
);
183 String base
= source
.getProtocol() + "://" + source
.getHost();
184 if (source
.getPort() != -1) {
185 base
= base
+ ":" + source
.getPort();
187 source
= new URL(base
+ "/pools/" + poolNumber
);
188 } catch (NumberFormatException e
) {
189 // Not a simple pool, skip
190 } catch (MalformedURLException e
) {
195 if (isSetOriginalUrl(source
)) {
197 Document doc
= DataUtil
.load(Instance
.getInstance().getCache().open(source
, this, false), "UTF-8", source
.toString());
198 for (Element shortname
: doc
.getElementsByClass("set-shortname")) {
199 for (Element el
: shortname
.getElementsByTag("a")) {
200 if (!el
.attr("href").isEmpty())
201 return new URL(el
.absUrl("href"));
204 } catch (IOException e
) {
205 Instance
.getInstance().getTraceHandler().error(e
);
209 if (isPool(source
)) {
211 return new URL(source
.toString().replace("/pool/show/", "/pools/"));
212 } catch (MalformedURLException e
) {
216 return super.getCanonicalUrl(source
);
219 // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
220 private String
getTagsFromUrl(URL url
) {
221 String tags
= url
== null ?
"" : url
.getQuery();
222 int pos
= tags
.indexOf("tags=");
225 tags
= tags
.substring(pos
).substring("tags=".length());
230 pos
= tags
.indexOf('&');
232 tags
= tags
.substring(0, pos
);
234 pos
= tags
.indexOf('/');
236 tags
= tags
.substring(0, pos
);
242 private String
getTitle() {
245 Element el
= getSourceNode().getElementsByTag("title").first();
247 title
= el
.text().trim();
250 for (String s
: new String
[] { "e621", "-", "e621", "Pool", "-" }) {
251 if (title
.startsWith(s
)) {
252 title
= title
.substring(s
.length()).trim();
254 if (title
.endsWith(s
)) {
255 title
= title
.substring(0, title
.length() - s
.length()).trim();
259 if (isSearchOrSet(getSource())) {
260 title
= title
.isEmpty() ?
"e621" : "[e621] " + title
;
266 private String
getAuthor() throws IOException
{
267 StringBuilder builder
= new StringBuilder();
269 if (isSearchOrSet(getSource())) {
270 for (Element el
: getSourceNode().getElementsByClass("search-tag")) {
271 if (el
.attr("itemprop").equals("author")) {
272 if (builder
.length() > 0) {
273 builder
.append(", ");
275 builder
.append(el
.text().trim());
280 if (isPool(getSource())) {
281 String desc
= getDesc();
282 String descL
= desc
.toLowerCase();
284 if (descL
.startsWith("by:") || descL
.startsWith("by ")) {
285 desc
= desc
.substring(3).trim();
286 desc
= desc
.split("\n")[0];
288 String tab
[] = desc
.split(" ");
289 for (int i
= 0; i
< Math
.min(tab
.length
, 5); i
++) {
290 if (tab
[i
].startsWith("http"))
292 builder
.append(" ").append(tab
[i
]);
296 if (builder
.length() == 0) {
297 String url
= "https://e621.net/" + getSource().getPath()
299 Document page1
= DataUtil
.load(Instance
.getInstance().getCache()
300 .open(getSource(), this, false), "UTF-8",
302 for (Element el
: page1
.getElementsByClass("search-tag")) {
303 if (el
.attr("itemprop").equals("author")) {
304 if (builder
.length() > 0) {
305 builder
.append(", ");
307 builder
.append(el
.text().trim());
313 return builder
.toString();
317 private List
<String
> getTags() {
318 List
<String
> tags
= new ArrayList
<String
>();
319 if (isSearchOrSet(getSource())) {
320 String str
= getTagsFromUrl(getSource());
321 for (String tag
: str
.split("\\+")) {
323 tags
.add(URLDecoder
.decode(tag
.trim(), "UTF-8").trim());
324 } catch (UnsupportedEncodingException e
) {
332 private Image
getCover() throws IOException
{
334 List
<Entry
<String
, URL
>> chapters
= getChapters(null);
335 if (!chapters
.isEmpty()) {
336 URL chap1Url
= chapters
.get(0).getValue();
337 String imgsChap1
= getChapterContent(chap1Url
, 1, null);
338 if (!imgsChap1
.isEmpty()) {
339 imgsChap1
= imgsChap1
.split("]")[0].substring(1).trim();
340 image
= bsImages
.getImage(this, new URL(imgsChap1
));
347 // note: will be removed at getCanonicalUrl()
348 private boolean isSetOriginalUrl(URL originalUrl
) {
349 return originalUrl
.getPath().startsWith("/post_sets/");
352 private boolean isPool(URL url
) {
353 return url
.getPath().startsWith("/pools/") || url
.getPath().startsWith("/pool/show/");
356 // set will be renamed into search by canonical url
357 private boolean isSearchOrSet(URL url
) {
360 (url
.getPath().equals("/posts") && url
.getQuery().contains("tags="))
362 || isSetOriginalUrl(url
);