1 package be
.nikiroo
.fanfix
.supported
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
5 import java
.io
.UnsupportedEncodingException
;
6 import java
.net
.MalformedURLException
;
8 import java
.net
.URLDecoder
;
9 import java
.util
.AbstractMap
;
10 import java
.util
.ArrayList
;
11 import java
.util
.Collections
;
12 import java
.util
.Date
;
13 import java
.util
.LinkedList
;
14 import java
.util
.List
;
15 import java
.util
.Map
.Entry
;
17 import org
.jsoup
.helper
.DataUtil
;
18 import org
.jsoup
.nodes
.Document
;
19 import org
.jsoup
.nodes
.Element
;
20 import org
.jsoup
.select
.Elements
;
22 import be
.nikiroo
.fanfix
.Instance
;
23 import be
.nikiroo
.fanfix
.data
.MetaData
;
24 import be
.nikiroo
.utils
.IOUtils
;
25 import be
.nikiroo
.utils
.Image
;
26 import be
.nikiroo
.utils
.Progress
;
27 import be
.nikiroo
.utils
.StringUtils
;
30 * Support class for <a href="http://e621.net/">e621.net</a> and
31 * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
32 * including some of MLP.
34 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
35 * comics, but it can be difficult to browse.
39 class E621
extends BasicSupport
{
41 protected boolean supports(URL url
) {
42 String host
= url
.getHost();
43 if (host
.startsWith("www.")) {
44 host
= host
.substring("www.".length());
47 return ("e621.net".equals(host
) || "e926.net".equals(host
)) && (isPool(url
) || isSearchOrSet(url
));
51 protected boolean isHtml() {
56 protected MetaData
getMeta() throws IOException
{
57 MetaData meta
= new MetaData();
59 meta
.setTitle(getTitle());
60 meta
.setAuthor(getAuthor());
62 meta
.setTags(getTags());
63 meta
.setSource(getType().getSourceName());
64 meta
.setUrl(getSource().toString());
65 meta
.setPublisher(getType().getSourceName());
66 meta
.setUuid(getSource().toString());
69 meta
.setSubject("Furry");
70 meta
.setType(getType().toString());
71 meta
.setImageDocument(true);
72 meta
.setCover(getCover());
73 meta
.setFakeCover(true);
79 protected String
getDesc() throws IOException
{
80 if (isSearchOrSet(getSource())) {
81 StringBuilder builder
= new StringBuilder();
82 builder
.append("A collection of images from ").append(getSource().getHost()).append("\n") //
83 .append("\tTime of creation: " + StringUtils
.fromTime(new Date().getTime())).append("\n") //
84 .append("\tTags: ");//
85 for (String tag
: getTags()) {
86 builder
.append("\t\t").append(tag
);
89 return builder
.toString();
92 if (isPool(getSource())) {
93 Element el
= getSourceNode().getElementById("description");
103 protected List
<Entry
<String
, URL
>> getChapters(Progress pg
) throws IOException
{
104 List
<Entry
<String
, URL
>> chapters
= new LinkedList
<Entry
<String
, URL
>>();
106 if (isPool(getSource())) {
107 String baseUrl
= "https://e621.net/" + getSource().getPath() + "?page=";
108 chapters
= getChapters(getSource(), pg
, baseUrl
, "");
109 } else if (isSearchOrSet(getSource())) {
110 String baseUrl
= "https://e621.net/posts/?page=";
111 String search
= "&tags=" + getTagsFromUrl(getSource());
113 chapters
= getChapters(getSource(), pg
,
117 // sets and some pools are sorted in reverse order on the website
118 if (getSource().getPath().startsWith("/posts")) {
119 Collections
.reverse(chapters
);
125 private List
<Entry
<String
, URL
>> getChapters(URL source
, Progress pg
, String baseUrl
, String parameters
)
127 List
<Entry
<String
, URL
>> urls
= new ArrayList
<Entry
<String
, URL
>>();
129 if (source
.getHost().contains("e926")) {
130 baseUrl
= baseUrl
.replace("e621", "e926");
133 for (int i
= 1; true; i
++) {
134 URL url
= new URL(baseUrl
+ i
+ parameters
);
136 InputStream pageI
= Instance
.getInstance().getCache().open(url
, this, false);
138 if (IOUtils
.readSmallStream(pageI
).contains("Nobody here but us chickens!")) {
141 urls
.add(new AbstractMap
.SimpleEntry
<String
, URL
>("Page " + Integer
.toString(i
), url
));
145 } catch (Exception e
) {
154 protected String
getChapterContent(URL chapUrl
, int number
, Progress pg
) throws IOException
{
155 StringBuilder builder
= new StringBuilder();
156 Document chapterNode
= loadDocument(chapUrl
);
158 Elements articles
= chapterNode
.getElementsByTag("article");
160 // sets and some pools are sorted in reverse order on the website
161 if (getSource().getPath().startsWith("/posts")) {
162 Collections
.reverse(articles
);
165 for (Element el
: articles
) {
167 builder
.append(el
.attr("data-file-url"));
168 builder
.append("]<br/>");
171 return builder
.toString();
175 protected URL
getCanonicalUrl(URL source
) {
176 if (isSetOriginalUrl(source
)) {
178 Document doc
= DataUtil
.load(Instance
.getInstance().getCache().open(source
, this, false), "UTF-8", source
.toString());
179 for (Element shortname
: doc
.getElementsByClass("set-shortname")) {
180 for (Element el
: shortname
.getElementsByTag("a")) {
181 if (!el
.attr("href").isEmpty())
182 return new URL(el
.absUrl("href"));
185 } catch (IOException e
) {
186 Instance
.getInstance().getTraceHandler().error(e
);
190 if (isPool(source
)) {
192 return new URL(source
.toString().replace("/pool/show/", "/pools/"));
193 } catch (MalformedURLException e
) {
197 return super.getCanonicalUrl(source
);
200 // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
201 private String
getTagsFromUrl(URL url
) {
202 String tags
= url
== null ?
"" : url
.getQuery();
203 int pos
= tags
.indexOf("tags=");
206 tags
= tags
.substring(pos
).substring("tags=".length());
211 pos
= tags
.indexOf('&');
213 tags
= tags
.substring(0, pos
);
215 pos
= tags
.indexOf('/');
217 tags
= tags
.substring(0, pos
);
223 private String
getTitle() {
226 Element el
= getSourceNode().getElementsByTag("title").first();
228 title
= el
.text().trim();
231 for (String s
: new String
[] { "e621", "-", "e621" }) {
232 if (title
.startsWith(s
)) {
233 title
= title
.substring(s
.length()).trim();
235 if (title
.endsWith(s
)) {
236 title
= title
.substring(0, title
.length() - s
.length()).trim();
241 if (isSearchOrSet(getSource())) {
242 title
= title
.isEmpty() ?
"e621" : "[e621] " + title
;
247 private String
getAuthor() throws IOException
{
248 StringBuilder builder
= new StringBuilder();
250 if (isSearchOrSet(getSource())) {
251 for (Element el
: getSourceNode().getElementsByClass("search-tag")) {
252 if (el
.attr("itemprop").equals("author")) {
253 if (builder
.length() > 0) {
254 builder
.append(", ");
256 builder
.append(el
.text().trim());
261 if (isPool(getSource())) {
262 String desc
= getDesc();
263 String descL
= desc
.toLowerCase();
265 if (descL
.startsWith("by:") || descL
.startsWith("by ")) {
266 desc
= desc
.substring(3).trim();
267 desc
= desc
.split("\n")[0];
269 String tab
[] = desc
.split(" ");
270 for (int i
= 0; i
< Math
.min(tab
.length
, 5); i
++) {
271 if (tab
[i
].startsWith("http"))
273 builder
.append(" ").append(tab
[i
]);
278 return builder
.toString();
282 private List
<String
> getTags() {
283 List
<String
> tags
= new ArrayList
<String
>();
284 if (isSearchOrSet(getSource())) {
285 String str
= getTagsFromUrl(getSource());
286 for (String tag
: str
.split("\\+")) {
288 tags
.add(URLDecoder
.decode(tag
.trim(), "UTF-8").trim());
289 } catch (UnsupportedEncodingException e
) {
297 private Image
getCover() throws IOException
{
299 List
<Entry
<String
, URL
>> chapters
= getChapters(null);
300 if (!chapters
.isEmpty()) {
301 URL chap1Url
= chapters
.get(0).getValue();
302 String imgsChap1
= getChapterContent(chap1Url
, 1, null);
303 if (!imgsChap1
.isEmpty()) {
304 imgsChap1
= imgsChap1
.split("]")[0].substring(1).trim();
305 image
= bsImages
.getImage(this, new URL(imgsChap1
));
312 // note: will be removed at getCanonicalUrl()
313 private boolean isSetOriginalUrl(URL originalUrl
) {
314 return originalUrl
.getPath().startsWith("/post_sets/");
317 private boolean isPool(URL url
) {
318 return url
.getPath().startsWith("/pools/") || url
.getPath().startsWith("/pool/show/");
321 // set will be renamed into search by canonical url
322 private boolean isSearchOrSet(URL url
) {
325 (url
.getPath().equals("/posts") && url
.getQuery().contains("tags="))
327 || isSetOriginalUrl(url
);