Merge branch 'subtree'
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / E621.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
3import java.io.IOException;
4import java.io.InputStream;
b5e9855b 5import java.io.UnsupportedEncodingException;
c4b18c94 6import java.net.MalformedURLException;
08fe2e33 7import java.net.URL;
b5e9855b 8import java.net.URLDecoder;
ce297a79 9import java.util.AbstractMap;
08fe2e33 10import java.util.ArrayList;
9b863b20 11import java.util.Collections;
8ac3d099 12import java.util.Date;
b5e9855b 13import java.util.LinkedList;
08fe2e33
NR
14import java.util.List;
15import java.util.Map.Entry;
8ac3d099
NR
16
17import org.jsoup.helper.DataUtil;
18import org.jsoup.nodes.Document;
19import org.jsoup.nodes.Element;
75002fcc 20import org.jsoup.select.Elements;
08fe2e33
NR
21
22import be.nikiroo.fanfix.Instance;
68686a37 23import be.nikiroo.fanfix.data.MetaData;
8ac3d099 24import be.nikiroo.utils.IOUtils;
16a81ef7 25import be.nikiroo.utils.Image;
3b2b638f 26import be.nikiroo.utils.Progress;
08fe2e33
NR
27import be.nikiroo.utils.StringUtils;
28
29/**
8ac3d099
NR
30 * Support class for <a href="http://e621.net/">e621.net</a> and
31 * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
08fe2e33
NR
32 * including some of MLP.
33 * <p>
34 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
35 * comics, but it can be difficult to browse.
36 *
37 * @author niki
38 */
8ac3d099 39class E621 extends BasicSupport {
08fe2e33
NR
40 @Override
41 protected boolean supports(URL url) {
42 String host = url.getHost();
43 if (host.startsWith("www.")) {
44 host = host.substring("www.".length());
45 }
46
8ac3d099 47 return ("e621.net".equals(host) || "e926.net".equals(host)) && (isPool(url) || isSearchOrSet(url));
08fe2e33
NR
48 }
49
50 @Override
51 protected boolean isHtml() {
52 return true;
53 }
54
8ac3d099
NR
55 @Override
56 protected MetaData getMeta() throws IOException {
57 MetaData meta = new MetaData();
b5e9855b 58
8ac3d099
NR
59 meta.setTitle(getTitle());
60 meta.setAuthor(getAuthor());
61 meta.setDate("");
62 meta.setTags(getTags());
63 meta.setSource(getType().getSourceName());
64 meta.setUrl(getSource().toString());
65 meta.setPublisher(getType().getSourceName());
66 meta.setUuid(getSource().toString());
67 meta.setLuid("");
68 meta.setLang("en");
69 meta.setSubject("Furry");
70 meta.setType(getType().toString());
71 meta.setImageDocument(true);
72 meta.setCover(getCover());
73 meta.setFakeCover(true);
595dfa7a 74
8ac3d099 75 return meta;
595dfa7a
NR
76 }
77
8ac3d099
NR
78 @Override
79 protected String getDesc() throws IOException {
80 if (isSearchOrSet(getSource())) {
b5e9855b 81 StringBuilder builder = new StringBuilder();
8ac3d099
NR
82 builder.append("A collection of images from ").append(getSource().getHost()).append("\n") //
83 .append("\tTime of creation: " + StringUtils.fromTime(new Date().getTime())).append("\n") //
84 .append("\tTags: ");//
85 for (String tag : getTags()) {
86 builder.append("\t\t").append(tag);
b5e9855b
NR
87 }
88
89 return builder.toString();
90 }
91
8ac3d099
NR
92 if (isPool(getSource())) {
93 Element el = getSourceNode().getElementById("description");
94 if (el != null) {
95 return el.text();
08fe2e33
NR
96 }
97 }
98
99 return null;
100 }
101
08fe2e33 102 @Override
8ac3d099 103 protected List<Entry<String, URL>> getChapters(Progress pg) throws IOException {
75002fcc
NR
104 List<Entry<String, URL>> chapters = new LinkedList<Entry<String, URL>>();
105
8ac3d099
NR
106 if (isPool(getSource())) {
107 String baseUrl = "https://e621.net/" + getSource().getPath() + "?page=";
75002fcc 108 chapters = getChapters(getSource(), pg, baseUrl, "");
8ac3d099
NR
109 } else if (isSearchOrSet(getSource())) {
110 String baseUrl = "https://e621.net/posts/?page=";
111 String search = "&tags=" + getTagsFromUrl(getSource());
75002fcc
NR
112
113 chapters = getChapters(getSource(), pg,
a351d69d 114 baseUrl, search);
b5e9855b
NR
115 }
116
75002fcc
NR
117 // sets and some pools are sorted in reverse order on the website
118 if (getSource().getPath().startsWith("/posts")) {
119 Collections.reverse(chapters);
120 }
121
122 return chapters;
b5e9855b
NR
123 }
124
8ac3d099
NR
125 private List<Entry<String, URL>> getChapters(URL source, Progress pg, String baseUrl, String parameters)
126 throws IOException {
b5e9855b
NR
127 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
128
b5e9855b
NR
129 if (source.getHost().contains("e926")) {
130 baseUrl = baseUrl.replace("e621", "e926");
131 }
132
133 for (int i = 1; true; i++) {
8ac3d099 134 URL url = new URL(baseUrl + i + parameters);
b5e9855b 135 try {
d66deb8d 136 InputStream pageI = Instance.getInstance().getCache().open(url, this, false);
b5e9855b 137 try {
8ac3d099 138 if (IOUtils.readSmallStream(pageI).contains("Nobody here but us chickens!")) {
b5e9855b 139 break;
8ac3d099
NR
140 }
141 urls.add(new AbstractMap.SimpleEntry<String, URL>("Page " + Integer.toString(i), url));
b5e9855b
NR
142 } finally {
143 pageI.close();
144 }
145 } catch (Exception e) {
146 break;
147 }
148 }
149
150 return urls;
151 }
152
8ac3d099
NR
153 @Override
154 protected String getChapterContent(URL chapUrl, int number, Progress pg) throws IOException {
155 StringBuilder builder = new StringBuilder();
156 Document chapterNode = loadDocument(chapUrl);
75002fcc
NR
157
158 Elements articles = chapterNode.getElementsByTag("article");
159
160 // sets and some pools are sorted in reverse order on the website
161 if (getSource().getPath().startsWith("/posts")) {
162 Collections.reverse(articles);
163 }
164
165 for (Element el : articles) {
8ac3d099
NR
166 builder.append("[");
167 builder.append(el.attr("data-file-url"));
168 builder.append("]<br/>");
169 }
170
171 return builder.toString();
172 }
173
174 @Override
175 protected URL getCanonicalUrl(URL source) {
176 if (isSetOriginalUrl(source)) {
177 try {
d66deb8d 178 Document doc = DataUtil.load(Instance.getInstance().getCache().open(source, this, false), "UTF-8", source.toString());
8ac3d099
NR
179 for (Element shortname : doc.getElementsByClass("set-shortname")) {
180 for (Element el : shortname.getElementsByTag("a")) {
181 if (!el.attr("href").isEmpty())
182 return new URL(el.absUrl("href"));
08fe2e33
NR
183 }
184 }
8ac3d099 185 } catch (IOException e) {
d66deb8d 186 Instance.getInstance().getTraceHandler().error(e);
08fe2e33
NR
187 }
188 }
189
c4b18c94
NR
190 if (isPool(source)) {
191 try {
192 return new URL(source.toString().replace("/pool/show/", "/pools/"));
193 } catch (MalformedURLException e) {
194 }
195 }
196
8ac3d099
NR
197 return super.getCanonicalUrl(source);
198 }
199
200 // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
201 private String getTagsFromUrl(URL url) {
202 String tags = url == null ? "" : url.getQuery();
203 int pos = tags.indexOf("tags=");
204
205 if (pos >= 0) {
206 tags = tags.substring(pos).substring("tags=".length());
207 } else {
208 return "";
08fe2e33
NR
209 }
210
8ac3d099
NR
211 pos = tags.indexOf('&');
212 if (pos > 0) {
213 tags = tags.substring(0, pos);
214 }
215 pos = tags.indexOf('/');
216 if (pos > 0) {
217 tags = tags.substring(0, pos);
218 }
219
220 return tags;
08fe2e33
NR
221 }
222
8ac3d099
NR
223 private String getTitle() {
224 String title = "";
225
226 Element el = getSourceNode().getElementsByTag("title").first();
227 if (el != null) {
228 title = el.text().trim();
08fe2e33
NR
229 }
230
8ac3d099
NR
231 for (String s : new String[] { "e621", "-", "e621" }) {
232 if (title.startsWith(s)) {
233 title = title.substring(s.length()).trim();
08fe2e33 234 }
8ac3d099
NR
235 if (title.endsWith(s)) {
236 title = title.substring(0, title.length() - s.length()).trim();
237 }
238
08fe2e33
NR
239 }
240
8ac3d099
NR
241 if (isSearchOrSet(getSource())) {
242 title = title.isEmpty() ? "e621" : "[e621] " + title;
243 }
244 return title;
08fe2e33 245 }
b5e9855b 246
8ac3d099
NR
247 private String getAuthor() throws IOException {
248 StringBuilder builder = new StringBuilder();
249
250 if (isSearchOrSet(getSource())) {
251 for (Element el : getSourceNode().getElementsByClass("search-tag")) {
252 if (el.attr("itemprop").equals("author")) {
253 if (builder.length() > 0) {
254 builder.append(", ");
255 }
256 builder.append(el.text().trim());
9948521d 257 }
8ac3d099
NR
258 }
259 }
260
261 if (isPool(getSource())) {
262 String desc = getDesc();
263 String descL = desc.toLowerCase();
264
265 if (descL.startsWith("by:") || descL.startsWith("by ")) {
266 desc = desc.substring(3).trim();
267 desc = desc.split("\n")[0];
268
269 String tab[] = desc.split(" ");
270 for (int i = 0; i < Math.min(tab.length, 5); i++) {
271 if (tab[i].startsWith("http"))
272 break;
273 builder.append(" ").append(tab[i]);
9948521d 274 }
8ac3d099
NR
275 }
276 }
9948521d 277
8ac3d099
NR
278 return builder.toString();
279 }
280
281 // no tags for pools
282 private List<String> getTags() {
283 List<String> tags = new ArrayList<String>();
284 if (isSearchOrSet(getSource())) {
285 String str = getTagsFromUrl(getSource());
286 for (String tag : str.split("\\+")) {
9b863b20 287 try {
8ac3d099
NR
288 tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
289 } catch (UnsupportedEncodingException e) {
9b863b20
NR
290 }
291 }
292 }
9948521d 293
8ac3d099
NR
294 return tags;
295 }
296
297 private Image getCover() throws IOException {
298 Image image = null;
299 List<Entry<String, URL>> chapters = getChapters(null);
300 if (!chapters.isEmpty()) {
12c180fc
NR
301 URL chap1Url = chapters.get(0).getValue();
302 String imgsChap1 = getChapterContent(chap1Url, 1, null);
303 if (!imgsChap1.isEmpty()) {
304 imgsChap1 = imgsChap1.split("]")[0].substring(1).trim();
305 image = bsImages.getImage(this, new URL(imgsChap1));
306 }
8ac3d099
NR
307 }
308
309 return image;
310 }
311
312 // note: will be removed at getCanonicalUrl()
313 private boolean isSetOriginalUrl(URL originalUrl) {
314 return originalUrl.getPath().startsWith("/post_sets/");
9b863b20
NR
315 }
316
b5e9855b 317 private boolean isPool(URL url) {
c4b18c94 318 return url.getPath().startsWith("/pools/") || url.getPath().startsWith("/pool/show/");
b5e9855b
NR
319 }
320
8ac3d099
NR
321 // set will be renamed into search by canonical url
322 private boolean isSearchOrSet(URL url) {
323 return
324 // search:
325 (url.getPath().equals("/posts") && url.getQuery().contains("tags="))
326 // or set:
327 || isSetOriginalUrl(url);
b5e9855b 328 }
08fe2e33 329}