Instance: use getInstance()
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / E621.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.io.UnsupportedEncodingException;
6 import java.net.MalformedURLException;
7 import java.net.URL;
8 import java.net.URLDecoder;
9 import java.util.AbstractMap;
10 import java.util.ArrayList;
11 import java.util.Collections;
12 import java.util.Date;
13 import java.util.LinkedList;
14 import java.util.List;
15 import java.util.Map.Entry;
16
17 import org.jsoup.helper.DataUtil;
18 import org.jsoup.nodes.Document;
19 import org.jsoup.nodes.Element;
20
21 import be.nikiroo.fanfix.Instance;
22 import be.nikiroo.fanfix.data.MetaData;
23 import be.nikiroo.utils.IOUtils;
24 import be.nikiroo.utils.Image;
25 import be.nikiroo.utils.Progress;
26 import be.nikiroo.utils.StringUtils;
27
28 /**
29 * Support class for <a href="http://e621.net/">e621.net</a> and
30 * <a href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
31 * including some of MLP.
32 * <p>
33 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
34 * comics, but it can be difficult to browse.
35 *
36 * @author niki
37 */
38 class E621 extends BasicSupport {
39 @Override
40 protected boolean supports(URL url) {
41 String host = url.getHost();
42 if (host.startsWith("www.")) {
43 host = host.substring("www.".length());
44 }
45
46 return ("e621.net".equals(host) || "e926.net".equals(host)) && (isPool(url) || isSearchOrSet(url));
47 }
48
49 @Override
50 protected boolean isHtml() {
51 return true;
52 }
53
54 @Override
55 protected MetaData getMeta() throws IOException {
56 MetaData meta = new MetaData();
57
58 meta.setTitle(getTitle());
59 meta.setAuthor(getAuthor());
60 meta.setDate("");
61 meta.setTags(getTags());
62 meta.setSource(getType().getSourceName());
63 meta.setUrl(getSource().toString());
64 meta.setPublisher(getType().getSourceName());
65 meta.setUuid(getSource().toString());
66 meta.setLuid("");
67 meta.setLang("en");
68 meta.setSubject("Furry");
69 meta.setType(getType().toString());
70 meta.setImageDocument(true);
71 meta.setCover(getCover());
72 meta.setFakeCover(true);
73
74 return meta;
75 }
76
77 @Override
78 protected String getDesc() throws IOException {
79 if (isSearchOrSet(getSource())) {
80 StringBuilder builder = new StringBuilder();
81 builder.append("A collection of images from ").append(getSource().getHost()).append("\n") //
82 .append("\tTime of creation: " + StringUtils.fromTime(new Date().getTime())).append("\n") //
83 .append("\tTags: ");//
84 for (String tag : getTags()) {
85 builder.append("\t\t").append(tag);
86 }
87
88 return builder.toString();
89 }
90
91 if (isPool(getSource())) {
92 Element el = getSourceNode().getElementById("description");
93 if (el != null) {
94 return el.text();
95 }
96 }
97
98 return null;
99 }
100
101 @Override
102 protected List<Entry<String, URL>> getChapters(Progress pg) throws IOException {
103 if (isPool(getSource())) {
104 String baseUrl = "https://e621.net/" + getSource().getPath() + "?page=";
105 return getChapters(getSource(), pg, baseUrl, "");
106 } else if (isSearchOrSet(getSource())) {
107 String baseUrl = "https://e621.net/posts/?page=";
108 String search = "&tags=" + getTagsFromUrl(getSource());
109 return getChapters(getSource(), pg, baseUrl, search);
110 }
111
112 return new LinkedList<Entry<String, URL>>();
113 }
114
115 private List<Entry<String, URL>> getChapters(URL source, Progress pg, String baseUrl, String parameters)
116 throws IOException {
117 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
118
119 if (source.getHost().contains("e926")) {
120 baseUrl = baseUrl.replace("e621", "e926");
121 }
122
123 for (int i = 1; true; i++) {
124 URL url = new URL(baseUrl + i + parameters);
125 try {
126 InputStream pageI = Instance.getInstance().getCache().open(url, this, false);
127 try {
128 if (IOUtils.readSmallStream(pageI).contains("Nobody here but us chickens!")) {
129 break;
130 }
131 urls.add(new AbstractMap.SimpleEntry<String, URL>("Page " + Integer.toString(i), url));
132 } finally {
133 pageI.close();
134 }
135 } catch (Exception e) {
136 break;
137 }
138 }
139
140 // They are sorted in reverse order on the website
141 Collections.reverse(urls);
142 return urls;
143 }
144
145 @Override
146 protected String getChapterContent(URL chapUrl, int number, Progress pg) throws IOException {
147 StringBuilder builder = new StringBuilder();
148 Document chapterNode = loadDocument(chapUrl);
149 for (Element el : chapterNode.getElementsByTag("article")) {
150 builder.append("[");
151 builder.append(el.attr("data-file-url"));
152 builder.append("]<br/>");
153 }
154
155 return builder.toString();
156 }
157
158 @Override
159 protected URL getCanonicalUrl(URL source) {
160 if (isSetOriginalUrl(source)) {
161 try {
162 Document doc = DataUtil.load(Instance.getInstance().getCache().open(source, this, false), "UTF-8", source.toString());
163 for (Element shortname : doc.getElementsByClass("set-shortname")) {
164 for (Element el : shortname.getElementsByTag("a")) {
165 if (!el.attr("href").isEmpty())
166 return new URL(el.absUrl("href"));
167 }
168 }
169 } catch (IOException e) {
170 Instance.getInstance().getTraceHandler().error(e);
171 }
172 }
173
174 if (isPool(source)) {
175 try {
176 return new URL(source.toString().replace("/pool/show/", "/pools/"));
177 } catch (MalformedURLException e) {
178 }
179 }
180
181 return super.getCanonicalUrl(source);
182 }
183
184 // returns "xxx+ddd+ggg" if "tags=xxx+ddd+ggg" was present in the query
185 private String getTagsFromUrl(URL url) {
186 String tags = url == null ? "" : url.getQuery();
187 int pos = tags.indexOf("tags=");
188
189 if (pos >= 0) {
190 tags = tags.substring(pos).substring("tags=".length());
191 } else {
192 return "";
193 }
194
195 pos = tags.indexOf('&');
196 if (pos > 0) {
197 tags = tags.substring(0, pos);
198 }
199 pos = tags.indexOf('/');
200 if (pos > 0) {
201 tags = tags.substring(0, pos);
202 }
203
204 return tags;
205 }
206
207 private String getTitle() {
208 String title = "";
209
210 Element el = getSourceNode().getElementsByTag("title").first();
211 if (el != null) {
212 title = el.text().trim();
213 }
214
215 for (String s : new String[] { "e621", "-", "e621" }) {
216 if (title.startsWith(s)) {
217 title = title.substring(s.length()).trim();
218 }
219 if (title.endsWith(s)) {
220 title = title.substring(0, title.length() - s.length()).trim();
221 }
222
223 }
224
225 if (isSearchOrSet(getSource())) {
226 title = title.isEmpty() ? "e621" : "[e621] " + title;
227 }
228 return title;
229 }
230
231 private String getAuthor() throws IOException {
232 StringBuilder builder = new StringBuilder();
233
234 if (isSearchOrSet(getSource())) {
235 for (Element el : getSourceNode().getElementsByClass("search-tag")) {
236 if (el.attr("itemprop").equals("author")) {
237 if (builder.length() > 0) {
238 builder.append(", ");
239 }
240 builder.append(el.text().trim());
241 }
242 }
243 }
244
245 if (isPool(getSource())) {
246 String desc = getDesc();
247 String descL = desc.toLowerCase();
248
249 if (descL.startsWith("by:") || descL.startsWith("by ")) {
250 desc = desc.substring(3).trim();
251 desc = desc.split("\n")[0];
252
253 String tab[] = desc.split(" ");
254 for (int i = 0; i < Math.min(tab.length, 5); i++) {
255 if (tab[i].startsWith("http"))
256 break;
257 builder.append(" ").append(tab[i]);
258 }
259 }
260 }
261
262 return builder.toString();
263 }
264
265 // no tags for pools
266 private List<String> getTags() {
267 List<String> tags = new ArrayList<String>();
268 if (isSearchOrSet(getSource())) {
269 String str = getTagsFromUrl(getSource());
270 for (String tag : str.split("\\+")) {
271 try {
272 tags.add(URLDecoder.decode(tag.trim(), "UTF-8").trim());
273 } catch (UnsupportedEncodingException e) {
274 }
275 }
276 }
277
278 return tags;
279 }
280
281 private Image getCover() throws IOException {
282 Image image = null;
283 List<Entry<String, URL>> chapters = getChapters(null);
284 if (!chapters.isEmpty()) {
285 URL chap1Url = chapters.get(0).getValue();
286 String imgsChap1 = getChapterContent(chap1Url, 1, null);
287 if (!imgsChap1.isEmpty()) {
288 imgsChap1 = imgsChap1.split("]")[0].substring(1).trim();
289 image = bsImages.getImage(this, new URL(imgsChap1));
290 }
291 }
292
293 return image;
294 }
295
296 // note: will be removed at getCanonicalUrl()
297 private boolean isSetOriginalUrl(URL originalUrl) {
298 return originalUrl.getPath().startsWith("/post_sets/");
299 }
300
301 private boolean isPool(URL url) {
302 return url.getPath().startsWith("/pools/") || url.getPath().startsWith("/pool/show/");
303 }
304
305 // set will be renamed into search by canonical url
306 private boolean isSearchOrSet(URL url) {
307 return
308 // search:
309 (url.getPath().equals("/posts") && url.getQuery().contains("tags="))
310 // or set:
311 || isSetOriginalUrl(url);
312 }
313 }