hide some config options
[nikiroo-utils.git] / supported / E621.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
3import java.io.IOException;
4import java.io.InputStream;
b5e9855b 5import java.io.UnsupportedEncodingException;
c4b18c94 6import java.net.MalformedURLException;
08fe2e33 7import java.net.URL;
b5e9855b 8import java.net.URLDecoder;
ce297a79 9import java.util.AbstractMap;
08fe2e33 10import java.util.ArrayList;
9b863b20 11import java.util.Collections;
8ac3d099 12import java.util.Date;
b5e9855b 13import java.util.LinkedList;
08fe2e33
NR
14import java.util.List;
15import java.util.Map.Entry;
8ac3d099
NR
16
17import org.jsoup.helper.DataUtil;
18import org.jsoup.nodes.Document;
19import org.jsoup.nodes.Element;
08fe2e33
NR
20
21import be.nikiroo.fanfix.Instance;
68686a37 22import be.nikiroo.fanfix.data.MetaData;
8ac3d099 23import be.nikiroo.utils.IOUtils;
16a81ef7 24import be.nikiroo.utils.Image;
3b2b638f 25import be.nikiroo.utils.Progress;
08fe2e33
NR
26import be.nikiroo.utils.StringUtils;
27
28/**
8ac3d099
NR
29 * Support class for <a href="http://e621.net/">e621.net</a> and
30 * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
08fe2e33
NR
31 * including some of MLP.
32 * <p>
33 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
34 * comics, but it can be difficult to browse.
35 *
36 * @author niki
37 */
8ac3d099 38class E621 extends BasicSupport {
08fe2e33
NR
39 @Override
40 protected boolean supports(URL url) {
41 String host = url.getHost();
42 if (host.startsWith("www.")) {
43 host = host.substring("www.".length());
44 }
45
8ac3d099 46 return ("e621.net".equals(host) || "e926.net".equals(host)) && (isPool(url) || isSearchOrSet(url));
08fe2e33
NR
47 }
48
49 @Override
50 protected boolean isHtml() {
51 return true;
52 }
53
8ac3d099
NR
54 @Override
55 protected MetaData getMeta() throws IOException {
56 MetaData meta = new MetaData();
b5e9855b 57
8ac3d099
NR
58 meta.setTitle(getTitle());
59 meta.setAuthor(getAuthor());
60 meta.setDate("");
61 meta.setTags(getTags());
62 meta.setSource(getType().getSourceName());
63 meta.setUrl(getSource().toString());
64 meta.setPublisher(getType().getSourceName());
65 meta.setUuid(getSource().toString());
66 meta.setLuid("");
67 meta.setLang("en");
68 meta.setSubject("Furry");
69 meta.setType(getType().toString());
70 meta.setImageDocument(true);
71 meta.setCover(getCover());
72 meta.setFakeCover(true);
595dfa7a 73
8ac3d099 74 return meta;
595dfa7a
NR
75 }
76
8ac3d099
NR
77 @Override
78 protected String getDesc() throws IOException {
79 if (isSearchOrSet(getSource())) {
b5e9855b 80 StringBuilder builder = new StringBuilder();
8ac3d099
NR
81 builder.append("A collection of images from ").append(getSource().getHost()).append("\n") //
82 .append("\tTime of creation: " + StringUtils.fromTime(new Date().getTime())).append("\n") //
83 .append("\tTags: ");//
84 for (String tag : getTags()) {
85 builder.append("\t\t").append(tag);
b5e9855b
NR
86 }
87
88 return builder.toString();
89 }
90
8ac3d099
NR
91 if (isPool(getSource())) {
92 Element el = getSourceNode().getElementById("description");
93 if (el != null) {
94 return el.text();
08fe2e33
NR
95 }
96 }
97
98 return null;
99 }
100
08fe2e33 101 @Override
8ac3d099
NR
102 protected List<Entry<String, URL>> getChapters(Progress pg) throws IOException {
103 if (isPool(getSource())) {
104 String baseUrl = "https://e621.net/" + getSource().getPath() + "?page=";
105 return getChapters(getSource(), pg, baseUrl, "");
106 } else if (isSearchOrSet(getSource())) {
107 String baseUrl = "https://e621.net/posts/?page=";
108 String search = "&tags=" + getTagsFromUrl(getSource());
a351d69d
NR
109 // sets are sorted in reverse order on the website
110 List<Entry<String, URL>> urls = getChapters(getSource(), pg,
111 baseUrl, search);
112 Collections.reverse(urls);
113 return urls;
b5e9855b
NR
114 }
115
116 return new LinkedList<Entry<String, URL>>();
117 }
118
8ac3d099
NR
119 private List<Entry<String, URL>> getChapters(URL source, Progress pg, String baseUrl, String parameters)
120 throws IOException {
b5e9855b
NR
121 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
122
b5e9855b
NR
123 if (source.getHost().contains("e926")) {
124 baseUrl = baseUrl.replace("e621", "e926");
125 }
126
127 for (int i = 1; true; i++) {
8ac3d099 128 URL url = new URL(baseUrl + i + parameters);
b5e9855b 129 try {
d66deb8d 130 InputStream pageI = Instance.getInstance().getCache().open(url, this, false);
b5e9855b 131 try {
8ac3d099 132 if (IOUtils.readSmallStream(pageI).contains("Nobody here but us chickens!")) {
b5e9855b 133 break;
8ac3d099
NR
134 }
135 urls.add(new AbstractMap.SimpleEntry<String, URL>("Page " + Integer.toString(i), url));
b5e9855b
NR
136 } finally {
137 pageI.close();
138 }
139 } catch (Exception e) {
140 break;
141 }
142 }
143
144 return urls;
145 }
146
8ac3d099
NR
147 @Override
148 protected String getChapterContent(URL chapUrl, int number, Progress pg) throws IOException {
149 StringBuilder builder = new StringBuilder();
150 Document chapterNode = loadDocument(chapUrl);
151 for (Element el : chapterNode.getElementsByTag("article")) {
152 builder.append("[");
153 builder.append(el.attr("data-file-url"));
154 builder.append("]<br/>");
155 }
156
157 return builder.toString();
158 }
159
160 @Override
161 protected URL getCanonicalUrl(URL source) {
162 if (isSetOriginalUrl(source)) {
163 try {
d66deb8d 164 Document doc = DataUtil.load(Instance.getInstance().getCache().open(source, this, false), "UTF-8", source.toString());
8ac3d099
NR
165 for (Element shortname : doc.getElementsByClass("set-shortname")) {
166 for (Element el : shortname.getElementsByTag("a")) {
167 if (!el.attr("href").isEmpty())
168 return new URL(el.absUrl("href"));
08fe2e33
NR
169 }
170 }
8ac3d099 171 } catch (IOException e) {
d66deb8d 172 Instance.getInstance().getTraceHandler().error(e);
08fe2e33
NR
173 }
174 }
175
c4b18c94
NR
176 if (isPool(source)) {
177 try {
178 return new URL(source.toString().replace("/pool/show/", "/pools/"));
179 } catch (MalformedURLException e) {
180 }
181 }
182
8ac3d099
NR
183 return super.getCanonicalUrl(source);
184 }
185
186 // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
187 private String getTagsFromUrl(URL url) {
188 String tags = url == null ? "" : url.getQuery();
189 int pos = tags.indexOf("tags=");
190
191 if (pos >= 0) {
192 tags = tags.substring(pos).substring("tags=".length());
193 } else {
194 return "";
08fe2e33
NR
195 }
196
8ac3d099
NR
197 pos = tags.indexOf('&');
198 if (pos > 0) {
199 tags = tags.substring(0, pos);
200 }
201 pos = tags.indexOf('/');
202 if (pos > 0) {
203 tags = tags.substring(0, pos);
204 }
205
206 return tags;
08fe2e33
NR
207 }
208
8ac3d099
NR
209 private String getTitle() {
210 String title = "";
211
212 Element el = getSourceNode().getElementsByTag("title").first();
213 if (el != null) {
214 title = el.text().trim();
08fe2e33
NR
215 }
216
8ac3d099
NR
217 for (String s : new String[] { "e621", "-", "e621" }) {
218 if (title.startsWith(s)) {
219 title = title.substring(s.length()).trim();
08fe2e33 220 }
8ac3d099
NR
221 if (title.endsWith(s)) {
222 title = title.substring(0, title.length() - s.length()).trim();
223 }
224
08fe2e33
NR
225 }
226
8ac3d099
NR
227 if (isSearchOrSet(getSource())) {
228 title = title.isEmpty() ? "e621" : "[e621] " + title;
229 }
230 return title;
08fe2e33 231 }
b5e9855b 232
8ac3d099
NR
233 private String getAuthor() throws IOException {
234 StringBuilder builder = new StringBuilder();
235
236 if (isSearchOrSet(getSource())) {
237 for (Element el : getSourceNode().getElementsByClass("search-tag")) {
238 if (el.attr("itemprop").equals("author")) {
239 if (builder.length() > 0) {
240 builder.append(", ");
241 }
242 builder.append(el.text().trim());
9948521d 243 }
8ac3d099
NR
244 }
245 }
246
247 if (isPool(getSource())) {
248 String desc = getDesc();
249 String descL = desc.toLowerCase();
250
251 if (descL.startsWith("by:") || descL.startsWith("by ")) {
252 desc = desc.substring(3).trim();
253 desc = desc.split("\n")[0];
254
255 String tab[] = desc.split(" ");
256 for (int i = 0; i < Math.min(tab.length, 5); i++) {
257 if (tab[i].startsWith("http"))
258 break;
259 builder.append(" ").append(tab[i]);
9948521d 260 }
8ac3d099
NR
261 }
262 }
9948521d 263
8ac3d099
NR
264 return builder.toString();
265 }
266
267 // no tags for pools
268 private List<String> getTags() {
269 List<String> tags = new ArrayList<String>();
270 if (isSearchOrSet(getSource())) {
271 String str = getTagsFromUrl(getSource());
272 for (String tag : str.split("\\+")) {
9b863b20 273 try {
8ac3d099
NR
274 tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
275 } catch (UnsupportedEncodingException e) {
9b863b20
NR
276 }
277 }
278 }
9948521d 279
8ac3d099
NR
280 return tags;
281 }
282
283 private Image getCover() throws IOException {
284 Image image = null;
285 List<Entry<String, URL>> chapters = getChapters(null);
286 if (!chapters.isEmpty()) {
12c180fc
NR
287 URL chap1Url = chapters.get(0).getValue();
288 String imgsChap1 = getChapterContent(chap1Url, 1, null);
289 if (!imgsChap1.isEmpty()) {
290 imgsChap1 = imgsChap1.split("]")[0].substring(1).trim();
291 image = bsImages.getImage(this, new URL(imgsChap1));
292 }
8ac3d099
NR
293 }
294
295 return image;
296 }
297
298 // note: will be removed at getCanonicalUrl()
299 private boolean isSetOriginalUrl(URL originalUrl) {
300 return originalUrl.getPath().startsWith("/post_sets/");
9b863b20
NR
301 }
302
b5e9855b 303 private boolean isPool(URL url) {
c4b18c94 304 return url.getPath().startsWith("/pools/") || url.getPath().startsWith("/pool/show/");
b5e9855b
NR
305 }
306
8ac3d099
NR
307 // set will be renamed into search by canonical url
308 private boolean isSearchOrSet(URL url) {
309 return
310 // search:
311 (url.getPath().equals("/posts") && url.getQuery().contains("tags="))
312 // or set:
313 || isSetOriginalUrl(url);
b5e9855b 314 }
08fe2e33 315}