changelog
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / E621.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
3import java.io.IOException;
4import java.io.InputStream;
b5e9855b 5import java.io.UnsupportedEncodingException;
08fe2e33 6import java.net.URL;
b5e9855b 7import java.net.URLDecoder;
ce297a79 8import java.util.AbstractMap;
08fe2e33 9import java.util.ArrayList;
9b863b20 10import java.util.Collections;
8ac3d099 11import java.util.Date;
b5e9855b 12import java.util.LinkedList;
08fe2e33
NR
13import java.util.List;
14import java.util.Map.Entry;
8ac3d099
NR
15
16import org.jsoup.helper.DataUtil;
17import org.jsoup.nodes.Document;
18import org.jsoup.nodes.Element;
08fe2e33
NR
19
20import be.nikiroo.fanfix.Instance;
68686a37 21import be.nikiroo.fanfix.data.MetaData;
8ac3d099 22import be.nikiroo.utils.IOUtils;
16a81ef7 23import be.nikiroo.utils.Image;
3b2b638f 24import be.nikiroo.utils.Progress;
08fe2e33
NR
25import be.nikiroo.utils.StringUtils;
26
27/**
8ac3d099
NR
28 * Support class for <a href="http://e621.net/">e621.net</a> and
29 * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
08fe2e33
NR
30 * including some of MLP.
31 * <p>
32 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
33 * comics, but it can be difficult to browse.
34 *
35 * @author niki
36 */
8ac3d099 37class E621 extends BasicSupport {
08fe2e33
NR
38 @Override
39 protected boolean supports(URL url) {
40 String host = url.getHost();
41 if (host.startsWith("www.")) {
42 host = host.substring("www.".length());
43 }
44
8ac3d099 45 return ("e621.net".equals(host) || "e926.net".equals(host)) && (isPool(url) || isSearchOrSet(url));
08fe2e33
NR
46 }
47
48 @Override
49 protected boolean isHtml() {
50 return true;
51 }
52
8ac3d099
NR
53 @Override
54 protected MetaData getMeta() throws IOException {
55 MetaData meta = new MetaData();
b5e9855b 56
8ac3d099
NR
57 meta.setTitle(getTitle());
58 meta.setAuthor(getAuthor());
59 meta.setDate("");
60 meta.setTags(getTags());
61 meta.setSource(getType().getSourceName());
62 meta.setUrl(getSource().toString());
63 meta.setPublisher(getType().getSourceName());
64 meta.setUuid(getSource().toString());
65 meta.setLuid("");
66 meta.setLang("en");
67 meta.setSubject("Furry");
68 meta.setType(getType().toString());
69 meta.setImageDocument(true);
70 meta.setCover(getCover());
71 meta.setFakeCover(true);
595dfa7a 72
8ac3d099 73 return meta;
595dfa7a
NR
74 }
75
8ac3d099
NR
76 @Override
77 protected String getDesc() throws IOException {
78 if (isSearchOrSet(getSource())) {
b5e9855b 79 StringBuilder builder = new StringBuilder();
8ac3d099
NR
80 builder.append("A collection of images from ").append(getSource().getHost()).append("\n") //
81 .append("\tTime of creation: " + StringUtils.fromTime(new Date().getTime())).append("\n") //
82 .append("\tTags: ");//
83 for (String tag : getTags()) {
84 builder.append("\t\t").append(tag);
b5e9855b
NR
85 }
86
87 return builder.toString();
88 }
89
8ac3d099
NR
90 if (isPool(getSource())) {
91 Element el = getSourceNode().getElementById("description");
92 if (el != null) {
93 return el.text();
08fe2e33
NR
94 }
95 }
96
97 return null;
98 }
99
08fe2e33 100 @Override
8ac3d099
NR
101 protected List<Entry<String, URL>> getChapters(Progress pg) throws IOException {
102 if (isPool(getSource())) {
103 String baseUrl = "https://e621.net/" + getSource().getPath() + "?page=";
104 return getChapters(getSource(), pg, baseUrl, "");
105 } else if (isSearchOrSet(getSource())) {
106 String baseUrl = "https://e621.net/posts/?page=";
107 String search = "&tags=" + getTagsFromUrl(getSource());
108 return getChapters(getSource(), pg, baseUrl, search);
b5e9855b
NR
109 }
110
111 return new LinkedList<Entry<String, URL>>();
112 }
113
8ac3d099
NR
114 private List<Entry<String, URL>> getChapters(URL source, Progress pg, String baseUrl, String parameters)
115 throws IOException {
b5e9855b
NR
116 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
117
b5e9855b
NR
118 if (source.getHost().contains("e926")) {
119 baseUrl = baseUrl.replace("e621", "e926");
120 }
121
122 for (int i = 1; true; i++) {
8ac3d099 123 URL url = new URL(baseUrl + i + parameters);
b5e9855b
NR
124 try {
125 InputStream pageI = Instance.getCache().open(url, this, false);
126 try {
8ac3d099 127 if (IOUtils.readSmallStream(pageI).contains("Nobody here but us chickens!")) {
b5e9855b 128 break;
8ac3d099
NR
129 }
130 urls.add(new AbstractMap.SimpleEntry<String, URL>("Page " + Integer.toString(i), url));
b5e9855b
NR
131 } finally {
132 pageI.close();
133 }
134 } catch (Exception e) {
135 break;
136 }
137 }
138
9b863b20
NR
139 // They are sorted in reverse order on the website
140 Collections.reverse(urls);
b5e9855b
NR
141 return urls;
142 }
143
8ac3d099
NR
144 @Override
145 protected String getChapterContent(URL chapUrl, int number, Progress pg) throws IOException {
146 StringBuilder builder = new StringBuilder();
147 Document chapterNode = loadDocument(chapUrl);
148 for (Element el : chapterNode.getElementsByTag("article")) {
149 builder.append("[");
150 builder.append(el.attr("data-file-url"));
151 builder.append("]<br/>");
152 }
153
154 return builder.toString();
155 }
156
157 @Override
158 protected URL getCanonicalUrl(URL source) {
159 if (isSetOriginalUrl(source)) {
160 try {
161 Document doc = DataUtil.load(Instance.getCache().open(source, this, false), "UTF-8", source.toString());
162 for (Element shortname : doc.getElementsByClass("set-shortname")) {
163 for (Element el : shortname.getElementsByTag("a")) {
164 if (!el.attr("href").isEmpty())
165 return new URL(el.absUrl("href"));
08fe2e33
NR
166 }
167 }
8ac3d099
NR
168 } catch (IOException e) {
169 Instance.getTraceHandler().error(e);
08fe2e33
NR
170 }
171 }
172
8ac3d099
NR
173 return super.getCanonicalUrl(source);
174 }
175
176 // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
177 private String getTagsFromUrl(URL url) {
178 String tags = url == null ? "" : url.getQuery();
179 int pos = tags.indexOf("tags=");
180
181 if (pos >= 0) {
182 tags = tags.substring(pos).substring("tags=".length());
183 } else {
184 return "";
08fe2e33
NR
185 }
186
8ac3d099
NR
187 pos = tags.indexOf('&');
188 if (pos > 0) {
189 tags = tags.substring(0, pos);
190 }
191 pos = tags.indexOf('/');
192 if (pos > 0) {
193 tags = tags.substring(0, pos);
194 }
195
196 return tags;
08fe2e33
NR
197 }
198
8ac3d099
NR
199 private String getTitle() {
200 String title = "";
201
202 Element el = getSourceNode().getElementsByTag("title").first();
203 if (el != null) {
204 title = el.text().trim();
08fe2e33
NR
205 }
206
8ac3d099
NR
207 for (String s : new String[] { "e621", "-", "e621" }) {
208 if (title.startsWith(s)) {
209 title = title.substring(s.length()).trim();
08fe2e33 210 }
8ac3d099
NR
211 if (title.endsWith(s)) {
212 title = title.substring(0, title.length() - s.length()).trim();
213 }
214
08fe2e33
NR
215 }
216
8ac3d099
NR
217 if (isSearchOrSet(getSource())) {
218 title = title.isEmpty() ? "e621" : "[e621] " + title;
219 }
220 return title;
08fe2e33 221 }
b5e9855b 222
8ac3d099
NR
223 private String getAuthor() throws IOException {
224 StringBuilder builder = new StringBuilder();
225
226 if (isSearchOrSet(getSource())) {
227 for (Element el : getSourceNode().getElementsByClass("search-tag")) {
228 if (el.attr("itemprop").equals("author")) {
229 if (builder.length() > 0) {
230 builder.append(", ");
231 }
232 builder.append(el.text().trim());
9948521d 233 }
8ac3d099
NR
234 }
235 }
236
237 if (isPool(getSource())) {
238 String desc = getDesc();
239 String descL = desc.toLowerCase();
240
241 if (descL.startsWith("by:") || descL.startsWith("by ")) {
242 desc = desc.substring(3).trim();
243 desc = desc.split("\n")[0];
244
245 String tab[] = desc.split(" ");
246 for (int i = 0; i < Math.min(tab.length, 5); i++) {
247 if (tab[i].startsWith("http"))
248 break;
249 builder.append(" ").append(tab[i]);
9948521d 250 }
8ac3d099
NR
251 }
252 }
9948521d 253
8ac3d099
NR
254 return builder.toString();
255 }
256
257 // no tags for pools
258 private List<String> getTags() {
259 List<String> tags = new ArrayList<String>();
260 if (isSearchOrSet(getSource())) {
261 String str = getTagsFromUrl(getSource());
262 for (String tag : str.split("\\+")) {
9b863b20 263 try {
8ac3d099
NR
264 tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
265 } catch (UnsupportedEncodingException e) {
9b863b20
NR
266 }
267 }
268 }
9948521d 269
8ac3d099
NR
270 return tags;
271 }
272
273 private Image getCover() throws IOException {
274 Image image = null;
275 List<Entry<String, URL>> chapters = getChapters(null);
276 if (!chapters.isEmpty()) {
277 URL url = chapters.get(0).getValue();
278 image = bsImages.getImage(this, url);
279 }
280
281 return image;
282 }
283
284 // note: will be removed at getCanonicalUrl()
285 private boolean isSetOriginalUrl(URL originalUrl) {
286 return originalUrl.getPath().startsWith("/post_sets/");
9b863b20
NR
287 }
288
b5e9855b 289 private boolean isPool(URL url) {
8ac3d099 290 return url.getPath().startsWith("/pools/");
b5e9855b
NR
291 }
292
8ac3d099
NR
293 // set will be renamed into search by canonical url
294 private boolean isSearchOrSet(URL url) {
295 return
296 // search:
297 (url.getPath().equals("/posts") && url.getQuery().contains("tags="))
298 // or set:
299 || isSetOriginalUrl(url);
b5e9855b 300 }
08fe2e33 301}