1 package be
.nikiroo
.fanfix
.supported
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
5 import java
.io
.UnsupportedEncodingException
;
7 import java
.net
.URLDecoder
;
8 import java
.util
.AbstractMap
;
9 import java
.util
.ArrayList
;
10 import java
.util
.Collections
;
11 import java
.util
.Date
;
12 import java
.util
.LinkedList
;
13 import java
.util
.List
;
14 import java
.util
.Map
.Entry
;
16 import org
.jsoup
.helper
.DataUtil
;
17 import org
.jsoup
.nodes
.Document
;
18 import org
.jsoup
.nodes
.Element
;
20 import be
.nikiroo
.fanfix
.Instance
;
21 import be
.nikiroo
.fanfix
.data
.MetaData
;
22 import be
.nikiroo
.utils
.IOUtils
;
23 import be
.nikiroo
.utils
.Image
;
24 import be
.nikiroo
.utils
.Progress
;
25 import be
.nikiroo
.utils
.StringUtils
;
28 * Support class for <a href="http://e621.net/">e621.net</a> and
29 * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
30 * including some of MLP.
32 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
33 * comics, but it can be difficult to browse.
37 class E621
extends BasicSupport
{
39 protected boolean supports(URL url
) {
40 String host
= url
.getHost();
41 if (host
.startsWith("www.")) {
42 host
= host
.substring("www.".length());
45 return ("e621.net".equals(host
) || "e926.net".equals(host
)) && (isPool(url
) || isSearchOrSet(url
));
49 protected boolean isHtml() {
54 protected MetaData
getMeta() throws IOException
{
55 MetaData meta
= new MetaData();
57 meta
.setTitle(getTitle());
58 meta
.setAuthor(getAuthor());
60 meta
.setTags(getTags());
61 meta
.setSource(getType().getSourceName());
62 meta
.setUrl(getSource().toString());
63 meta
.setPublisher(getType().getSourceName());
64 meta
.setUuid(getSource().toString());
67 meta
.setSubject("Furry");
68 meta
.setType(getType().toString());
69 meta
.setImageDocument(true);
70 meta
.setCover(getCover());
71 meta
.setFakeCover(true);
77 protected String
getDesc() throws IOException
{
78 if (isSearchOrSet(getSource())) {
79 StringBuilder builder
= new StringBuilder();
80 builder
.append("A collection of images from ").append(getSource().getHost()).append("\n") //
81 .append("\tTime of creation: " + StringUtils
.fromTime(new Date().getTime())).append("\n") //
82 .append("\tTags: ");//
83 for (String tag
: getTags()) {
84 builder
.append("\t\t").append(tag
);
87 return builder
.toString();
90 if (isPool(getSource())) {
91 Element el
= getSourceNode().getElementById("description");
101 protected List
<Entry
<String
, URL
>> getChapters(Progress pg
) throws IOException
{
102 if (isPool(getSource())) {
103 String baseUrl
= "https://e621.net/" + getSource().getPath() + "?page=";
104 return getChapters(getSource(), pg
, baseUrl
, "");
105 } else if (isSearchOrSet(getSource())) {
106 String baseUrl
= "https://e621.net/posts/?page=";
107 String search
= "&tags=" + getTagsFromUrl(getSource());
108 return getChapters(getSource(), pg
, baseUrl
, search
);
111 return new LinkedList
<Entry
<String
, URL
>>();
114 private List
<Entry
<String
, URL
>> getChapters(URL source
, Progress pg
, String baseUrl
, String parameters
)
116 List
<Entry
<String
, URL
>> urls
= new ArrayList
<Entry
<String
, URL
>>();
118 if (source
.getHost().contains("e926")) {
119 baseUrl
= baseUrl
.replace("e621", "e926");
122 for (int i
= 1; true; i
++) {
123 URL url
= new URL(baseUrl
+ i
+ parameters
);
125 InputStream pageI
= Instance
.getCache().open(url
, this, false);
127 if (IOUtils
.readSmallStream(pageI
).contains("Nobody here but us chickens!")) {
130 urls
.add(new AbstractMap
.SimpleEntry
<String
, URL
>("Page " + Integer
.toString(i
), url
));
134 } catch (Exception e
) {
139 // They are sorted in reverse order on the website
140 Collections
.reverse(urls
);
145 protected String
getChapterContent(URL chapUrl
, int number
, Progress pg
) throws IOException
{
146 StringBuilder builder
= new StringBuilder();
147 Document chapterNode
= loadDocument(chapUrl
);
148 for (Element el
: chapterNode
.getElementsByTag("article")) {
150 builder
.append(el
.attr("data-file-url"));
151 builder
.append("]<br/>");
154 return builder
.toString();
158 protected URL
getCanonicalUrl(URL source
) {
159 if (isSetOriginalUrl(source
)) {
161 Document doc
= DataUtil
.load(Instance
.getCache().open(source
, this, false), "UTF-8", source
.toString());
162 for (Element shortname
: doc
.getElementsByClass("set-shortname")) {
163 for (Element el
: shortname
.getElementsByTag("a")) {
164 if (!el
.attr("href").isEmpty())
165 return new URL(el
.absUrl("href"));
168 } catch (IOException e
) {
169 Instance
.getTraceHandler().error(e
);
173 return super.getCanonicalUrl(source
);
176 // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
177 private String
getTagsFromUrl(URL url
) {
178 String tags
= url
== null ?
"" : url
.getQuery();
179 int pos
= tags
.indexOf("tags=");
182 tags
= tags
.substring(pos
).substring("tags=".length());
187 pos
= tags
.indexOf('&');
189 tags
= tags
.substring(0, pos
);
191 pos
= tags
.indexOf('/');
193 tags
= tags
.substring(0, pos
);
199 private String
getTitle() {
202 Element el
= getSourceNode().getElementsByTag("title").first();
204 title
= el
.text().trim();
207 for (String s
: new String
[] { "e621", "-", "e621" }) {
208 if (title
.startsWith(s
)) {
209 title
= title
.substring(s
.length()).trim();
211 if (title
.endsWith(s
)) {
212 title
= title
.substring(0, title
.length() - s
.length()).trim();
217 if (isSearchOrSet(getSource())) {
218 title
= title
.isEmpty() ?
"e621" : "[e621] " + title
;
223 private String
getAuthor() throws IOException
{
224 StringBuilder builder
= new StringBuilder();
226 if (isSearchOrSet(getSource())) {
227 for (Element el
: getSourceNode().getElementsByClass("search-tag")) {
228 if (el
.attr("itemprop").equals("author")) {
229 if (builder
.length() > 0) {
230 builder
.append(", ");
232 builder
.append(el
.text().trim());
237 if (isPool(getSource())) {
238 String desc
= getDesc();
239 String descL
= desc
.toLowerCase();
241 if (descL
.startsWith("by:") || descL
.startsWith("by ")) {
242 desc
= desc
.substring(3).trim();
243 desc
= desc
.split("\n")[0];
245 String tab
[] = desc
.split(" ");
246 for (int i
= 0; i
< Math
.min(tab
.length
, 5); i
++) {
247 if (tab
[i
].startsWith("http"))
249 builder
.append(" ").append(tab
[i
]);
254 return builder
.toString();
258 private List
<String
> getTags() {
259 List
<String
> tags
= new ArrayList
<String
>();
260 if (isSearchOrSet(getSource())) {
261 String str
= getTagsFromUrl(getSource());
262 for (String tag
: str
.split("\\+")) {
264 tags
.add(URLDecoder
.decode(tag
.trim(), "UTF-8").trim());
265 } catch (UnsupportedEncodingException e
) {
273 private Image
getCover() throws IOException
{
275 List
<Entry
<String
, URL
>> chapters
= getChapters(null);
276 if (!chapters
.isEmpty()) {
277 URL url
= chapters
.get(0).getValue();
278 image
= bsImages
.getImage(this, url
);
284 // note: will be removed at getCanonicalUrl()
285 private boolean isSetOriginalUrl(URL originalUrl
) {
286 return originalUrl
.getPath().startsWith("/post_sets/");
289 private boolean isPool(URL url
) {
290 return url
.getPath().startsWith("/pools/");
293 // set will be renamed into search by canonical url
294 private boolean isSearchOrSet(URL url
) {
297 (url
.getPath().equals("/posts") && url
.getQuery().contains("tags="))
299 || isSetOriginalUrl(url
);