Add URL into .info and MetaData, work on Library
[fanfix.git] / src / be / nikiroo / fanfix / supported / E621.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.URL;
6 import java.util.ArrayList;
7 import java.util.List;
8 import java.util.Map.Entry;
9 import java.util.Scanner;
10
11 import be.nikiroo.fanfix.Instance;
12 import be.nikiroo.fanfix.data.Chapter;
13 import be.nikiroo.fanfix.data.MetaData;
14 import be.nikiroo.fanfix.data.Story;
15 import be.nikiroo.utils.StringUtils;
16
17 /**
18 * Support class for <a href="http://e621.net/">e621.net</a> and <a
19 * href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
20 * including some of MLP.
21 * <p>
22 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
23 * comics, but it can be difficult to browse.
24 *
25 * @author niki
26 */
27 class E621 extends BasicSupport {
28 @Override
29 public String getSourceName() {
30 return "e621.net";
31 }
32
33 @Override
34 protected MetaData getMeta(URL source, InputStream in) throws IOException {
35 MetaData meta = new MetaData();
36
37 meta.setTitle(getTitle(reset(in)));
38 meta.setAuthor(getAuthor(source, reset(in)));
39 meta.setDate("");
40 meta.setTags(new ArrayList<String>()); // TODDO ???
41 meta.setSource(getSourceName());
42 meta.setUrl(source.toString());
43 meta.setPublisher(getSourceName());
44 meta.setUuid(source.toString());
45 meta.setLuid("");
46 meta.setLang("EN");
47 meta.setSubject("");
48 meta.setType(getType().toString());
49 meta.setImageDocument(true);
50 meta.setCover(null);
51
52 return meta;
53 }
54
55 @Override
56 public Story process(URL url) throws IOException {
57 // There is no chapters on e621, just pagination...
58 Story story = super.process(url);
59
60 Chapter only = new Chapter(1, null);
61 for (Chapter chap : story) {
62 only.getParagraphs().addAll(chap.getParagraphs());
63 }
64
65 story.getChapters().clear();
66 story.getChapters().add(only);
67
68 return story;
69 }
70
71 @Override
72 protected boolean supports(URL url) {
73 String host = url.getHost();
74 if (host.startsWith("www.")) {
75 host = host.substring("www.".length());
76 }
77
78 return ("e621.net".equals(host) || "e926.net".equals(host))
79 && url.getPath().startsWith("/pool/");
80 }
81
82 @Override
83 protected boolean isHtml() {
84 return true;
85 }
86
87 private String getAuthor(URL source, InputStream in) throws IOException {
88 String author = getLine(in, "href=\"/post/show/", 0);
89 if (author != null) {
90 String key = "href=\"";
91 int pos = author.indexOf(key);
92 if (pos >= 0) {
93 author = author.substring(pos + key.length());
94 pos = author.indexOf("\"");
95 if (pos >= 0) {
96 author = author.substring(0, pos - 1);
97 String page = source.getProtocol() + "://"
98 + source.getHost() + author;
99 InputStream pageIn = Instance.getCache().open(
100 new URL(page), this, false);
101 try {
102 key = "class=\"tag-type-artist\"";
103 author = getLine(pageIn, key, 0);
104 if (author != null) {
105 pos = author.indexOf("<a href=\"");
106 if (pos >= 0) {
107 author = author.substring(pos);
108 pos = author.indexOf("</a>");
109 if (pos >= 0) {
110 author = author.substring(0, pos);
111 return StringUtils.unhtml(author);
112 }
113 }
114 }
115 } finally {
116 pageIn.close();
117 }
118 }
119 }
120 }
121
122 return null;
123 }
124
125 private String getTitle(InputStream in) throws IOException {
126 String title = getLine(in, "<title>", 0);
127 if (title != null) {
128 int pos = title.indexOf('>');
129 if (pos >= 0) {
130 title = title.substring(pos + 1);
131 pos = title.indexOf('<');
132 if (pos >= 0) {
133 title = title.substring(0, pos);
134 }
135 }
136
137 if (title.startsWith("Pool:")) {
138 title = title.substring("Pool:".length());
139 }
140
141 title = StringUtils.unhtml(title).trim();
142 }
143
144 return title;
145 }
146
147 @Override
148 protected String getDesc(URL source, InputStream in) throws IOException {
149 String desc = getLine(in, "margin-bottom: 2em;", 0);
150
151 if (desc != null) {
152 StringBuilder builder = new StringBuilder();
153
154 boolean inTags = false;
155 for (char car : desc.toCharArray()) {
156 if ((inTags && car == '>') || (!inTags && car == '<')) {
157 inTags = !inTags;
158 }
159
160 if (inTags) {
161 builder.append(car);
162 }
163 }
164
165 return builder.toString().trim();
166 }
167
168 return null;
169 }
170
171 @Override
172 protected List<Entry<String, URL>> getChapters(URL source, InputStream in)
173 throws IOException {
174 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
175 int last = 1; // no pool/show when only one page
176
177 @SuppressWarnings("resource")
178 Scanner scan = new Scanner(in, "UTF-8");
179 scan.useDelimiter("\\n");
180 while (scan.hasNext()) {
181 String line = scan.next();
182 for (int pos = line.indexOf(source.getPath()); pos >= 0; pos = line
183 .indexOf(source.getPath(), pos + source.getPath().length())) {
184 int equalPos = line.indexOf("=", pos);
185 int quotePos = line.indexOf("\"", pos);
186 if (equalPos >= 0 && quotePos > equalPos) {
187 String snum = line.substring(equalPos + 1, quotePos);
188 try {
189 int num = Integer.parseInt(snum);
190 if (num > last) {
191 last = num;
192 }
193 } catch (NumberFormatException e) {
194 }
195 }
196 }
197 }
198
199 for (int i = 1; i <= last; i++) {
200 final String key = Integer.toString(i);
201 final URL value = new URL(source.toString() + "?page=" + i);
202 urls.add(new Entry<String, URL>() {
203 public URL setValue(URL value) {
204 return null;
205 }
206
207 public URL getValue() {
208 return value;
209 }
210
211 public String getKey() {
212 return key;
213 }
214 });
215 }
216
217 return urls;
218 }
219
220 @Override
221 protected String getChapterContent(URL source, InputStream in, int number)
222 throws IOException {
223 StringBuilder builder = new StringBuilder();
224 String staticSite = "https://static1.e621.net";
225 if (source.getHost().contains("e926")) {
226 staticSite = staticSite.replace("e621", "e926");
227 }
228
229 String key = staticSite + "/data/preview/";
230
231 @SuppressWarnings("resource")
232 Scanner scan = new Scanner(in, "UTF-8");
233 scan.useDelimiter("\\n");
234 while (scan.hasNext()) {
235 String line = scan.next();
236 if (line.contains("class=\"preview\"")) {
237 for (int pos = line.indexOf(key); pos >= 0; pos = line.indexOf(
238 key, pos + key.length())) {
239 int endPos = line.indexOf("\"", pos);
240 if (endPos >= 0) {
241 String id = line.substring(pos + key.length(), endPos);
242 id = staticSite + "/data/" + id;
243
244 int dotPos = id.lastIndexOf(".");
245 if (dotPos >= 0) {
246 id = id.substring(0, dotPos);
247 builder.append("[");
248 builder.append(id);
249 builder.append("]\n");
250 }
251 }
252 }
253 }
254 }
255
256 return builder.toString();
257 }
258 }