1 package be
.nikiroo
.fanfix
.supported
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
5 import java
.io
.UnsupportedEncodingException
;
6 import java
.net
.MalformedURLException
;
8 import java
.net
.URLDecoder
;
9 import java
.util
.AbstractMap
;
10 import java
.util
.ArrayList
;
11 import java
.util
.Collections
;
12 import java
.util
.Date
;
13 import java
.util
.LinkedList
;
14 import java
.util
.List
;
15 import java
.util
.Map
.Entry
;
17 import org
.jsoup
.helper
.DataUtil
;
18 import org
.jsoup
.nodes
.Document
;
19 import org
.jsoup
.nodes
.Element
;
21 import be
.nikiroo
.fanfix
.Instance
;
22 import be
.nikiroo
.fanfix
.data
.MetaData
;
23 import be
.nikiroo
.utils
.IOUtils
;
24 import be
.nikiroo
.utils
.Image
;
25 import be
.nikiroo
.utils
.Progress
;
26 import be
.nikiroo
.utils
.StringUtils
;
29 * Support class for <a href="http://e621.net/">e621.net</a> and
30 * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
31 * including some of MLP.
33 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
34 * comics, but it can be difficult to browse.
38 class E621
extends BasicSupport
{
40 protected boolean supports(URL url
) {
41 String host
= url
.getHost();
42 if (host
.startsWith("www.")) {
43 host
= host
.substring("www.".length());
46 return ("e621.net".equals(host
) || "e926.net".equals(host
)) && (isPool(url
) || isSearchOrSet(url
));
50 protected boolean isHtml() {
55 protected MetaData
getMeta() throws IOException
{
56 MetaData meta
= new MetaData();
58 meta
.setTitle(getTitle());
59 meta
.setAuthor(getAuthor());
61 meta
.setTags(getTags());
62 meta
.setSource(getType().getSourceName());
63 meta
.setUrl(getSource().toString());
64 meta
.setPublisher(getType().getSourceName());
65 meta
.setUuid(getSource().toString());
68 meta
.setSubject("Furry");
69 meta
.setType(getType().toString());
70 meta
.setImageDocument(true);
71 meta
.setCover(getCover());
72 meta
.setFakeCover(true);
78 protected String
getDesc() throws IOException
{
79 if (isSearchOrSet(getSource())) {
80 StringBuilder builder
= new StringBuilder();
81 builder
.append("A collection of images from ").append(getSource().getHost()).append("\n") //
82 .append("\tTime of creation: " + StringUtils
.fromTime(new Date().getTime())).append("\n") //
83 .append("\tTags: ");//
84 for (String tag
: getTags()) {
85 builder
.append("\t\t").append(tag
);
88 return builder
.toString();
91 if (isPool(getSource())) {
92 Element el
= getSourceNode().getElementById("description");
102 protected List
<Entry
<String
, URL
>> getChapters(Progress pg
) throws IOException
{
103 if (isPool(getSource())) {
104 String baseUrl
= "https://e621.net/" + getSource().getPath() + "?page=";
105 return getChapters(getSource(), pg
, baseUrl
, "");
106 } else if (isSearchOrSet(getSource())) {
107 String baseUrl
= "https://e621.net/posts/?page=";
108 String search
= "&tags=" + getTagsFromUrl(getSource());
109 return getChapters(getSource(), pg
, baseUrl
, search
);
112 return new LinkedList
<Entry
<String
, URL
>>();
115 private List
<Entry
<String
, URL
>> getChapters(URL source
, Progress pg
, String baseUrl
, String parameters
)
117 List
<Entry
<String
, URL
>> urls
= new ArrayList
<Entry
<String
, URL
>>();
119 if (source
.getHost().contains("e926")) {
120 baseUrl
= baseUrl
.replace("e621", "e926");
123 for (int i
= 1; true; i
++) {
124 URL url
= new URL(baseUrl
+ i
+ parameters
);
126 InputStream pageI
= Instance
.getCache().open(url
, this, false);
128 if (IOUtils
.readSmallStream(pageI
).contains("Nobody here but us chickens!")) {
131 urls
.add(new AbstractMap
.SimpleEntry
<String
, URL
>("Page " + Integer
.toString(i
), url
));
135 } catch (Exception e
) {
140 // They are sorted in reverse order on the website
141 Collections
.reverse(urls
);
146 protected String
getChapterContent(URL chapUrl
, int number
, Progress pg
) throws IOException
{
147 StringBuilder builder
= new StringBuilder();
148 Document chapterNode
= loadDocument(chapUrl
);
149 for (Element el
: chapterNode
.getElementsByTag("article")) {
151 builder
.append(el
.attr("data-file-url"));
152 builder
.append("]<br/>");
155 return builder
.toString();
159 protected URL
getCanonicalUrl(URL source
) {
160 if (isSetOriginalUrl(source
)) {
162 Document doc
= DataUtil
.load(Instance
.getCache().open(source
, this, false), "UTF-8", source
.toString());
163 for (Element shortname
: doc
.getElementsByClass("set-shortname")) {
164 for (Element el
: shortname
.getElementsByTag("a")) {
165 if (!el
.attr("href").isEmpty())
166 return new URL(el
.absUrl("href"));
169 } catch (IOException e
) {
170 Instance
.getTraceHandler().error(e
);
174 if (isPool(source
)) {
176 return new URL(source
.toString().replace("/pool/show/", "/pools/"));
177 } catch (MalformedURLException e
) {
181 return super.getCanonicalUrl(source
);
184 // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
185 private String
getTagsFromUrl(URL url
) {
186 String tags
= url
== null ?
"" : url
.getQuery();
187 int pos
= tags
.indexOf("tags=");
190 tags
= tags
.substring(pos
).substring("tags=".length());
195 pos
= tags
.indexOf('&');
197 tags
= tags
.substring(0, pos
);
199 pos
= tags
.indexOf('/');
201 tags
= tags
.substring(0, pos
);
207 private String
getTitle() {
210 Element el
= getSourceNode().getElementsByTag("title").first();
212 title
= el
.text().trim();
215 for (String s
: new String
[] { "e621", "-", "e621" }) {
216 if (title
.startsWith(s
)) {
217 title
= title
.substring(s
.length()).trim();
219 if (title
.endsWith(s
)) {
220 title
= title
.substring(0, title
.length() - s
.length()).trim();
225 if (isSearchOrSet(getSource())) {
226 title
= title
.isEmpty() ?
"e621" : "[e621] " + title
;
231 private String
getAuthor() throws IOException
{
232 StringBuilder builder
= new StringBuilder();
234 if (isSearchOrSet(getSource())) {
235 for (Element el
: getSourceNode().getElementsByClass("search-tag")) {
236 if (el
.attr("itemprop").equals("author")) {
237 if (builder
.length() > 0) {
238 builder
.append(", ");
240 builder
.append(el
.text().trim());
245 if (isPool(getSource())) {
246 String desc
= getDesc();
247 String descL
= desc
.toLowerCase();
249 if (descL
.startsWith("by:") || descL
.startsWith("by ")) {
250 desc
= desc
.substring(3).trim();
251 desc
= desc
.split("\n")[0];
253 String tab
[] = desc
.split(" ");
254 for (int i
= 0; i
< Math
.min(tab
.length
, 5); i
++) {
255 if (tab
[i
].startsWith("http"))
257 builder
.append(" ").append(tab
[i
]);
262 return builder
.toString();
266 private List
<String
> getTags() {
267 List
<String
> tags
= new ArrayList
<String
>();
268 if (isSearchOrSet(getSource())) {
269 String str
= getTagsFromUrl(getSource());
270 for (String tag
: str
.split("\\+")) {
272 tags
.add(URLDecoder
.decode(tag
.trim(), "UTF-8").trim());
273 } catch (UnsupportedEncodingException e
) {
281 private Image
getCover() throws IOException
{
283 List
<Entry
<String
, URL
>> chapters
= getChapters(null);
284 if (!chapters
.isEmpty()) {
285 URL chap1Url
= chapters
.get(0).getValue();
286 String imgsChap1
= getChapterContent(chap1Url
, 1, null);
287 if (!imgsChap1
.isEmpty()) {
288 imgsChap1
= imgsChap1
.split("]")[0].substring(1).trim();
289 image
= bsImages
.getImage(this, new URL(imgsChap1
));
296 // note: will be removed at getCanonicalUrl()
297 private boolean isSetOriginalUrl(URL originalUrl
) {
298 return originalUrl
.getPath().startsWith("/post_sets/");
301 private boolean isPool(URL url
) {
302 return url
.getPath().startsWith("/pools/") || url
.getPath().startsWith("/pool/show/");
305 // set will be renamed into search by canonical url
306 private boolean isSearchOrSet(URL url
) {
309 (url
.getPath().equals("/posts") && url
.getQuery().contains("tags="))
311 || isSetOriginalUrl(url
);