update e621 (site changed + move to not deprecated BasicSupport
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / E621.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.io.UnsupportedEncodingException;
6 import java.net.URL;
7 import java.net.URLDecoder;
8 import java.util.AbstractMap;
9 import java.util.ArrayList;
10 import java.util.Collections;
11 import java.util.Date;
12 import java.util.LinkedList;
13 import java.util.List;
14 import java.util.Map.Entry;
15
16 import org.jsoup.helper.DataUtil;
17 import org.jsoup.nodes.Document;
18 import org.jsoup.nodes.Element;
19
20 import be.nikiroo.fanfix.Instance;
21 import be.nikiroo.fanfix.data.MetaData;
22 import be.nikiroo.utils.IOUtils;
23 import be.nikiroo.utils.Image;
24 import be.nikiroo.utils.Progress;
25 import be.nikiroo.utils.StringUtils;
26
27 /**
28 * Support class for <a href="http://e621.net/">e621.net</a> and
29 * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
30 * including some of MLP.
31 * <p>
32 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
33 * comics, but it can be difficult to browse.
34 *
35 * @author niki
36 */
37 class E621 extends BasicSupport {
38 @Override
39 protected boolean supports(URL url) {
40 String host = url.getHost();
41 if (host.startsWith("www.")) {
42 host = host.substring("www.".length());
43 }
44
45 return ("e621.net".equals(host) || "e926.net".equals(host)) && (isPool(url) || isSearchOrSet(url));
46 }
47
48 @Override
49 protected boolean isHtml() {
50 return true;
51 }
52
53 @Override
54 protected MetaData getMeta() throws IOException {
55 MetaData meta = new MetaData();
56
57 meta.setTitle(getTitle());
58 meta.setAuthor(getAuthor());
59 meta.setDate("");
60 meta.setTags(getTags());
61 meta.setSource(getType().getSourceName());
62 meta.setUrl(getSource().toString());
63 meta.setPublisher(getType().getSourceName());
64 meta.setUuid(getSource().toString());
65 meta.setLuid("");
66 meta.setLang("en");
67 meta.setSubject("Furry");
68 meta.setType(getType().toString());
69 meta.setImageDocument(true);
70 meta.setCover(getCover());
71 meta.setFakeCover(true);
72
73 return meta;
74 }
75
76 @Override
77 protected String getDesc() throws IOException {
78 if (isSearchOrSet(getSource())) {
79 StringBuilder builder = new StringBuilder();
80 builder.append("A collection of images from ").append(getSource().getHost()).append("\n") //
81 .append("\tTime of creation: " + StringUtils.fromTime(new Date().getTime())).append("\n") //
82 .append("\tTags: ");//
83 for (String tag : getTags()) {
84 builder.append("\t\t").append(tag);
85 }
86
87 return builder.toString();
88 }
89
90 if (isPool(getSource())) {
91 Element el = getSourceNode().getElementById("description");
92 if (el != null) {
93 return el.text();
94 }
95 }
96
97 return null;
98 }
99
100 @Override
101 protected List<Entry<String, URL>> getChapters(Progress pg) throws IOException {
102 if (isPool(getSource())) {
103 String baseUrl = "https://e621.net/" + getSource().getPath() + "?page=";
104 return getChapters(getSource(), pg, baseUrl, "");
105 } else if (isSearchOrSet(getSource())) {
106 String baseUrl = "https://e621.net/posts/?page=";
107 String search = "&tags=" + getTagsFromUrl(getSource());
108 return getChapters(getSource(), pg, baseUrl, search);
109 }
110
111 return new LinkedList<Entry<String, URL>>();
112 }
113
114 private List<Entry<String, URL>> getChapters(URL source, Progress pg, String baseUrl, String parameters)
115 throws IOException {
116 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
117
118 if (source.getHost().contains("e926")) {
119 baseUrl = baseUrl.replace("e621", "e926");
120 }
121
122 for (int i = 1; true; i++) {
123 URL url = new URL(baseUrl + i + parameters);
124 try {
125 InputStream pageI = Instance.getCache().open(url, this, false);
126 try {
127 if (IOUtils.readSmallStream(pageI).contains("Nobody here but us chickens!")) {
128 break;
129 }
130 urls.add(new AbstractMap.SimpleEntry<String, URL>("Page " + Integer.toString(i), url));
131 } finally {
132 pageI.close();
133 }
134 } catch (Exception e) {
135 break;
136 }
137 }
138
139 // They are sorted in reverse order on the website
140 Collections.reverse(urls);
141 return urls;
142 }
143
144 @Override
145 protected String getChapterContent(URL chapUrl, int number, Progress pg) throws IOException {
146 StringBuilder builder = new StringBuilder();
147 Document chapterNode = loadDocument(chapUrl);
148 for (Element el : chapterNode.getElementsByTag("article")) {
149 builder.append("[");
150 builder.append(el.attr("data-file-url"));
151 builder.append("]<br/>");
152 }
153
154 return builder.toString();
155 }
156
157 @Override
158 protected URL getCanonicalUrl(URL source) {
159 if (isSetOriginalUrl(source)) {
160 try {
161 Document doc = DataUtil.load(Instance.getCache().open(source, this, false), "UTF-8", source.toString());
162 for (Element shortname : doc.getElementsByClass("set-shortname")) {
163 for (Element el : shortname.getElementsByTag("a")) {
164 if (!el.attr("href").isEmpty())
165 return new URL(el.absUrl("href"));
166 }
167 }
168 } catch (IOException e) {
169 Instance.getTraceHandler().error(e);
170 }
171 }
172
173 return super.getCanonicalUrl(source);
174 }
175
176 // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
177 private String getTagsFromUrl(URL url) {
178 String tags = url == null ? "" : url.getQuery();
179 int pos = tags.indexOf("tags=");
180
181 if (pos >= 0) {
182 tags = tags.substring(pos).substring("tags=".length());
183 } else {
184 return "";
185 }
186
187 pos = tags.indexOf('&');
188 if (pos > 0) {
189 tags = tags.substring(0, pos);
190 }
191 pos = tags.indexOf('/');
192 if (pos > 0) {
193 tags = tags.substring(0, pos);
194 }
195
196 return tags;
197 }
198
199 private String getTitle() {
200 String title = "";
201
202 Element el = getSourceNode().getElementsByTag("title").first();
203 if (el != null) {
204 title = el.text().trim();
205 }
206
207 for (String s : new String[] { "e621", "-", "e621" }) {
208 if (title.startsWith(s)) {
209 title = title.substring(s.length()).trim();
210 }
211 if (title.endsWith(s)) {
212 title = title.substring(0, title.length() - s.length()).trim();
213 }
214
215 }
216
217 if (isSearchOrSet(getSource())) {
218 title = title.isEmpty() ? "e621" : "[e621] " + title;
219 }
220 return title;
221 }
222
223 private String getAuthor() throws IOException {
224 StringBuilder builder = new StringBuilder();
225
226 if (isSearchOrSet(getSource())) {
227 for (Element el : getSourceNode().getElementsByClass("search-tag")) {
228 if (el.attr("itemprop").equals("author")) {
229 if (builder.length() > 0) {
230 builder.append(", ");
231 }
232 builder.append(el.text().trim());
233 }
234 }
235 }
236
237 if (isPool(getSource())) {
238 String desc = getDesc();
239 String descL = desc.toLowerCase();
240
241 if (descL.startsWith("by:") || descL.startsWith("by ")) {
242 desc = desc.substring(3).trim();
243 desc = desc.split("\n")[0];
244
245 String tab[] = desc.split(" ");
246 for (int i = 0; i < Math.min(tab.length, 5); i++) {
247 if (tab[i].startsWith("http"))
248 break;
249 builder.append(" ").append(tab[i]);
250 }
251 }
252 }
253
254 return builder.toString();
255 }
256
257 // no tags for pools
258 private List<String> getTags() {
259 List<String> tags = new ArrayList<String>();
260 if (isSearchOrSet(getSource())) {
261 String str = getTagsFromUrl(getSource());
262 for (String tag : str.split("\\+")) {
263 try {
264 tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
265 } catch (UnsupportedEncodingException e) {
266 }
267 }
268 }
269
270 return tags;
271 }
272
273 private Image getCover() throws IOException {
274 Image image = null;
275 List<Entry<String, URL>> chapters = getChapters(null);
276 if (!chapters.isEmpty()) {
277 URL url = chapters.get(0).getValue();
278 image = bsImages.getImage(this, url);
279 }
280
281 return image;
282 }
283
284 // note: will be removed at getCanonicalUrl()
285 private boolean isSetOriginalUrl(URL originalUrl) {
286 return originalUrl.getPath().startsWith("/post_sets/");
287 }
288
289 private boolean isPool(URL url) {
290 return url.getPath().startsWith("/pools/");
291 }
292
293 // set will be renamed into search by canonical url
294 private boolean isSearchOrSet(URL url) {
295 return
296 // search:
297 (url.getPath().equals("/posts") && url.getQuery().contains("tags="))
298 // or set:
299 || isSetOriginalUrl(url);
300 }
301 }