Better URL entries + fix for FimFicAPI:
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / E621.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.URL;
6 import java.util.AbstractMap;
7 import java.util.ArrayList;
8 import java.util.List;
9 import java.util.Map.Entry;
10 import java.util.Scanner;
11
12 import be.nikiroo.fanfix.Instance;
13 import be.nikiroo.fanfix.data.Chapter;
14 import be.nikiroo.fanfix.data.MetaData;
15 import be.nikiroo.fanfix.data.Story;
16 import be.nikiroo.utils.Image;
17 import be.nikiroo.utils.Progress;
18 import be.nikiroo.utils.StringUtils;
19
20 /**
21 * Support class for <a href="http://e621.net/">e621.net</a> and <a
22 * href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
23 * including some of MLP.
24 * <p>
25 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
26 * comics, but it can be difficult to browse.
27 *
28 * @author niki
29 */
30 class E621 extends BasicSupport_Deprecated {
31 @Override
32 public String getSourceName() {
33 return "e621.net";
34 }
35
36 @Override
37 protected MetaData getMeta(URL source, InputStream in) throws IOException {
38 MetaData meta = new MetaData();
39
40 meta.setTitle(getTitle(reset(in)));
41 meta.setAuthor(getAuthor(source, reset(in)));
42 meta.setDate("");
43 meta.setTags(new ArrayList<String>()); // TODDO ???
44 meta.setSource(getSourceName());
45 meta.setUrl(source.toString());
46 meta.setPublisher(getSourceName());
47 meta.setUuid(source.toString());
48 meta.setLuid("");
49 meta.setLang("en");
50 meta.setSubject("Furry");
51 meta.setType(getType().toString());
52 meta.setImageDocument(true);
53 meta.setCover(getCover(source));
54 meta.setFakeCover(true);
55
56 return meta;
57 }
58
59 @Override
60 public Story process(URL url, Progress pg) throws IOException {
61 // There is no chapters on e621, just pagination...
62 Story story = super.process(url, pg);
63
64 Chapter only = new Chapter(1, null);
65 for (Chapter chap : story) {
66 only.getParagraphs().addAll(chap.getParagraphs());
67 }
68
69 story.getChapters().clear();
70 story.getChapters().add(only);
71
72 return story;
73 }
74
75 @Override
76 protected boolean supports(URL url) {
77 String host = url.getHost();
78 if (host.startsWith("www.")) {
79 host = host.substring("www.".length());
80 }
81
82 return ("e621.net".equals(host) || "e926.net".equals(host))
83 && url.getPath().startsWith("/pool/");
84 }
85
86 @Override
87 protected boolean isHtml() {
88 return true;
89 }
90
91 private Image getCover(URL source) throws IOException {
92 InputStream in = Instance.getCache().open(source, this, true);
93 String images = getChapterContent(new URL(source.toString() + "?page="
94 + 1), in, 1, null);
95 if (!images.isEmpty()) {
96 int pos = images.indexOf("<br/>");
97 if (pos >= 0) {
98 images = images.substring(1, pos - 1);
99 return getImage(this, null, images);
100 }
101 }
102
103 return null;
104 }
105
106 private String getAuthor(URL source, InputStream in) {
107 String author = getLine(in, "href=\"/post/show/", 0);
108 if (author != null) {
109 String key = "href=\"";
110 int pos = author.indexOf(key);
111 if (pos >= 0) {
112 author = author.substring(pos + key.length());
113 pos = author.indexOf("\"");
114 if (pos >= 0) {
115 author = author.substring(0, pos - 1);
116 String page = source.getProtocol() + "://"
117 + source.getHost() + author;
118 try {
119 InputStream pageIn = Instance.getCache().open(
120 new URL(page), this, false);
121 try {
122 key = "class=\"tag-type-artist\"";
123 author = getLine(pageIn, key, 0);
124 if (author != null) {
125 pos = author.indexOf("<a href=\"");
126 if (pos >= 0) {
127 author = author.substring(pos);
128 pos = author.indexOf("</a>");
129 if (pos >= 0) {
130 author = author.substring(0, pos);
131 return StringUtils.unhtml(author);
132 }
133 }
134 }
135 } finally {
136 pageIn.close();
137 }
138 } catch (Exception e) {
139 // No author found
140 }
141 }
142 }
143 }
144
145 return null;
146 }
147
148 private String getTitle(InputStream in) {
149 String title = getLine(in, "<title>", 0);
150 if (title != null) {
151 int pos = title.indexOf('>');
152 if (pos >= 0) {
153 title = title.substring(pos + 1);
154 pos = title.indexOf('<');
155 if (pos >= 0) {
156 title = title.substring(0, pos);
157 }
158 }
159
160 if (title.startsWith("Pool:")) {
161 title = title.substring("Pool:".length());
162 }
163
164 title = StringUtils.unhtml(title).trim();
165 }
166
167 return title;
168 }
169
170 @Override
171 protected String getDesc(URL source, InputStream in) throws IOException {
172 String desc = getLine(in, "margin-bottom: 2em;", 0);
173
174 if (desc != null) {
175 StringBuilder builder = new StringBuilder();
176
177 boolean inTags = false;
178 for (char car : desc.toCharArray()) {
179 if ((inTags && car == '>') || (!inTags && car == '<')) {
180 inTags = !inTags;
181 }
182
183 if (inTags) {
184 builder.append(car);
185 }
186 }
187
188 return builder.toString().trim();
189 }
190
191 return null;
192 }
193
194 @Override
195 protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
196 Progress pg) throws IOException {
197 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
198 int last = 1; // no pool/show when only one page
199
200 @SuppressWarnings("resource")
201 Scanner scan = new Scanner(in, "UTF-8");
202 scan.useDelimiter("\\n");
203 while (scan.hasNext()) {
204 String line = scan.next();
205 for (int pos = line.indexOf(source.getPath()); pos >= 0; pos = line
206 .indexOf(source.getPath(), pos + source.getPath().length())) {
207 int equalPos = line.indexOf("=", pos);
208 int quotePos = line.indexOf("\"", pos);
209 if (equalPos >= 0 && quotePos > equalPos) {
210 String snum = line.substring(equalPos + 1, quotePos);
211 try {
212 int num = Integer.parseInt(snum);
213 if (num > last) {
214 last = num;
215 }
216 } catch (NumberFormatException e) {
217 }
218 }
219 }
220 }
221
222 for (int i = 1; i <= last; i++) {
223 urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
224 .toString(i), new URL(source.toString() + "?page=" + i)));
225 }
226
227 return urls;
228 }
229
230 @Override
231 protected String getChapterContent(URL source, InputStream in, int number,
232 Progress pg) throws IOException {
233 StringBuilder builder = new StringBuilder();
234 String staticSite = "https://static1.e621.net";
235 if (source.getHost().contains("e926")) {
236 staticSite = staticSite.replace("e621", "e926");
237 }
238
239 String key = staticSite + "/data/preview/";
240
241 @SuppressWarnings("resource")
242 Scanner scan = new Scanner(in, "UTF-8");
243 scan.useDelimiter("\\n");
244 while (scan.hasNext()) {
245 String line = scan.next();
246 if (line.contains("class=\"preview")) {
247 for (int pos = line.indexOf(key); pos >= 0; pos = line.indexOf(
248 key, pos + key.length())) {
249 int endPos = line.indexOf("\"", pos);
250 if (endPos >= 0) {
251 String id = line.substring(pos + key.length(), endPos);
252 id = staticSite + "/data/" + id;
253
254 int dotPos = id.lastIndexOf(".");
255 if (dotPos >= 0) {
256 id = id.substring(0, dotPos);
257 builder.append("[");
258 builder.append(id);
259 builder.append("]<br/>");
260 }
261 }
262 }
263 }
264 }
265
266 return builder.toString();
267 }
268 }