476e88b77765e741235909c53f3d82166302399c
[fanfix.git] / src / be / nikiroo / fanfix / supported / E621.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.awt.image.BufferedImage;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.net.URL;
7 import java.util.ArrayList;
8 import java.util.List;
9 import java.util.Map.Entry;
10 import java.util.Scanner;
11
12 import be.nikiroo.fanfix.Instance;
13 import be.nikiroo.fanfix.data.Chapter;
14 import be.nikiroo.fanfix.data.MetaData;
15 import be.nikiroo.fanfix.data.Story;
16 import be.nikiroo.utils.StringUtils;
17
18 /**
19 * Support class for <a href="http://e621.net/">e621.net</a> and <a
20 * href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
21 * including some of MLP.
22 * <p>
23 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
24 * comics, but it can be difficult to browse.
25 *
26 * @author niki
27 */
28 class E621 extends BasicSupport {
29 @Override
30 public String getSourceName() {
31 return "e621.net";
32 }
33
34 @Override
35 protected MetaData getMeta(URL source, InputStream in) throws IOException {
36 MetaData meta = new MetaData();
37
38 meta.setTitle(getTitle(reset(in)));
39 meta.setAuthor(getAuthor(source, reset(in)));
40 meta.setDate("");
41 meta.setTags(new ArrayList<String>()); // TODDO ???
42 meta.setSource(getSourceName());
43 meta.setUrl(source.toString());
44 meta.setPublisher(getSourceName());
45 meta.setUuid(source.toString());
46 meta.setLuid("");
47 meta.setLang("EN");
48 meta.setSubject("");
49 meta.setType(getType().toString());
50 meta.setImageDocument(true);
51 meta.setCover(getCover(source));
52
53 return meta;
54 }
55
56 @Override
57 public Story process(URL url) throws IOException {
58 // There is no chapters on e621, just pagination...
59 Story story = super.process(url);
60
61 Chapter only = new Chapter(1, null);
62 for (Chapter chap : story) {
63 only.getParagraphs().addAll(chap.getParagraphs());
64 }
65
66 story.getChapters().clear();
67 story.getChapters().add(only);
68
69 return story;
70 }
71
72 @Override
73 protected boolean supports(URL url) {
74 String host = url.getHost();
75 if (host.startsWith("www.")) {
76 host = host.substring("www.".length());
77 }
78
79 return ("e621.net".equals(host) || "e926.net".equals(host))
80 && url.getPath().startsWith("/pool/");
81 }
82
83 @Override
84 protected boolean isHtml() {
85 return true;
86 }
87
88 private BufferedImage getCover(URL source) throws IOException {
89 InputStream in = Instance.getCache().open(source, this, true);
90 String images = getChapterContent(new URL(source.toString() + "?page="
91 + 1), in, 1);
92 if (!images.isEmpty()) {
93 int pos = images.indexOf('\n');
94 if (pos >= 0) {
95 images = images.substring(1, pos - 1);
96 return getImage(this, null, images);
97 }
98 }
99
100 return null;
101 }
102
103 private String getAuthor(URL source, InputStream in) throws IOException {
104 String author = getLine(in, "href=\"/post/show/", 0);
105 if (author != null) {
106 String key = "href=\"";
107 int pos = author.indexOf(key);
108 if (pos >= 0) {
109 author = author.substring(pos + key.length());
110 pos = author.indexOf("\"");
111 if (pos >= 0) {
112 author = author.substring(0, pos - 1);
113 String page = source.getProtocol() + "://"
114 + source.getHost() + author;
115 try {
116 InputStream pageIn = Instance.getCache().open(
117 new URL(page), this, false);
118 try {
119 key = "class=\"tag-type-artist\"";
120 author = getLine(pageIn, key, 0);
121 if (author != null) {
122 pos = author.indexOf("<a href=\"");
123 if (pos >= 0) {
124 author = author.substring(pos);
125 pos = author.indexOf("</a>");
126 if (pos >= 0) {
127 author = author.substring(0, pos);
128 return StringUtils.unhtml(author);
129 }
130 }
131 }
132 } finally {
133 pageIn.close();
134 }
135 } catch (Exception e) {
136 // No author found
137 }
138 }
139 }
140 }
141
142 return null;
143 }
144
145 private String getTitle(InputStream in) throws IOException {
146 String title = getLine(in, "<title>", 0);
147 if (title != null) {
148 int pos = title.indexOf('>');
149 if (pos >= 0) {
150 title = title.substring(pos + 1);
151 pos = title.indexOf('<');
152 if (pos >= 0) {
153 title = title.substring(0, pos);
154 }
155 }
156
157 if (title.startsWith("Pool:")) {
158 title = title.substring("Pool:".length());
159 }
160
161 title = StringUtils.unhtml(title).trim();
162 }
163
164 return title;
165 }
166
167 @Override
168 protected String getDesc(URL source, InputStream in) throws IOException {
169 String desc = getLine(in, "margin-bottom: 2em;", 0);
170
171 if (desc != null) {
172 StringBuilder builder = new StringBuilder();
173
174 boolean inTags = false;
175 for (char car : desc.toCharArray()) {
176 if ((inTags && car == '>') || (!inTags && car == '<')) {
177 inTags = !inTags;
178 }
179
180 if (inTags) {
181 builder.append(car);
182 }
183 }
184
185 return builder.toString().trim();
186 }
187
188 return null;
189 }
190
191 @Override
192 protected List<Entry<String, URL>> getChapters(URL source, InputStream in)
193 throws IOException {
194 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
195 int last = 1; // no pool/show when only one page
196
197 @SuppressWarnings("resource")
198 Scanner scan = new Scanner(in, "UTF-8");
199 scan.useDelimiter("\\n");
200 while (scan.hasNext()) {
201 String line = scan.next();
202 for (int pos = line.indexOf(source.getPath()); pos >= 0; pos = line
203 .indexOf(source.getPath(), pos + source.getPath().length())) {
204 int equalPos = line.indexOf("=", pos);
205 int quotePos = line.indexOf("\"", pos);
206 if (equalPos >= 0 && quotePos > equalPos) {
207 String snum = line.substring(equalPos + 1, quotePos);
208 try {
209 int num = Integer.parseInt(snum);
210 if (num > last) {
211 last = num;
212 }
213 } catch (NumberFormatException e) {
214 }
215 }
216 }
217 }
218
219 for (int i = 1; i <= last; i++) {
220 final String key = Integer.toString(i);
221 final URL value = new URL(source.toString() + "?page=" + i);
222 urls.add(new Entry<String, URL>() {
223 public URL setValue(URL value) {
224 return null;
225 }
226
227 public URL getValue() {
228 return value;
229 }
230
231 public String getKey() {
232 return key;
233 }
234 });
235 }
236
237 return urls;
238 }
239
240 @Override
241 protected String getChapterContent(URL source, InputStream in, int number)
242 throws IOException {
243 StringBuilder builder = new StringBuilder();
244 String staticSite = "https://static1.e621.net";
245 if (source.getHost().contains("e926")) {
246 staticSite = staticSite.replace("e621", "e926");
247 }
248
249 String key = staticSite + "/data/preview/";
250
251 @SuppressWarnings("resource")
252 Scanner scan = new Scanner(in, "UTF-8");
253 scan.useDelimiter("\\n");
254 while (scan.hasNext()) {
255 String line = scan.next();
256 if (line.contains("class=\"preview\"")) {
257 for (int pos = line.indexOf(key); pos >= 0; pos = line.indexOf(
258 key, pos + key.length())) {
259 int endPos = line.indexOf("\"", pos);
260 if (endPos >= 0) {
261 String id = line.substring(pos + key.length(), endPos);
262 id = staticSite + "/data/" + id;
263
264 int dotPos = id.lastIndexOf(".");
265 if (dotPos >= 0) {
266 id = id.substring(0, dotPos);
267 builder.append("[");
268 builder.append(id);
269 builder.append("]\n");
270 }
271 }
272 }
273 }
274 }
275
276 return builder.toString();
277 }
278 }