Fix e621 getAuthor() which could error out
[fanfix.git] / src / be / nikiroo / fanfix / supported / E621.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.URL;
6 import java.util.ArrayList;
7 import java.util.List;
8 import java.util.Map.Entry;
9 import java.util.Scanner;
10
11 import be.nikiroo.fanfix.Instance;
12 import be.nikiroo.fanfix.data.Chapter;
13 import be.nikiroo.fanfix.data.MetaData;
14 import be.nikiroo.fanfix.data.Story;
15 import be.nikiroo.utils.StringUtils;
16
17 /**
18 * Support class for <a href="http://e621.net/">e621.net</a> and <a
19 * href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
20 * including some of MLP.
21 * <p>
22 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
23 * comics, but it can be difficult to browse.
24 *
25 * @author niki
26 */
27 class E621 extends BasicSupport {
28 @Override
29 public String getSourceName() {
30 return "e621.net";
31 }
32
33 @Override
34 protected MetaData getMeta(URL source, InputStream in) throws IOException {
35 MetaData meta = new MetaData();
36
37 meta.setTitle(getTitle(reset(in)));
38 meta.setAuthor(getAuthor(source, reset(in)));
39 meta.setDate("");
40 meta.setTags(new ArrayList<String>()); // TODDO ???
41 meta.setSource(getSourceName());
42 meta.setUrl(source.toString());
43 meta.setPublisher(getSourceName());
44 meta.setUuid(source.toString());
45 meta.setLuid("");
46 meta.setLang("EN");
47 meta.setSubject("");
48 meta.setType(getType().toString());
49 meta.setImageDocument(true);
50 meta.setCover(null);
51
52 return meta;
53 }
54
55 @Override
56 public Story process(URL url) throws IOException {
57 // There is no chapters on e621, just pagination...
58 Story story = super.process(url);
59
60 Chapter only = new Chapter(1, null);
61 for (Chapter chap : story) {
62 only.getParagraphs().addAll(chap.getParagraphs());
63 }
64
65 story.getChapters().clear();
66 story.getChapters().add(only);
67
68 return story;
69 }
70
71 @Override
72 protected boolean supports(URL url) {
73 String host = url.getHost();
74 if (host.startsWith("www.")) {
75 host = host.substring("www.".length());
76 }
77
78 return ("e621.net".equals(host) || "e926.net".equals(host))
79 && url.getPath().startsWith("/pool/");
80 }
81
82 @Override
83 protected boolean isHtml() {
84 return true;
85 }
86
87 private String getAuthor(URL source, InputStream in) throws IOException {
88 String author = getLine(in, "href=\"/post/show/", 0);
89 if (author != null) {
90 String key = "href=\"";
91 int pos = author.indexOf(key);
92 if (pos >= 0) {
93 author = author.substring(pos + key.length());
94 pos = author.indexOf("\"");
95 if (pos >= 0) {
96 author = author.substring(0, pos - 1);
97 String page = source.getProtocol() + "://"
98 + source.getHost() + author;
99 try {
100 InputStream pageIn = Instance.getCache().open(
101 new URL(page), this, false);
102 try {
103 key = "class=\"tag-type-artist\"";
104 author = getLine(pageIn, key, 0);
105 if (author != null) {
106 pos = author.indexOf("<a href=\"");
107 if (pos >= 0) {
108 author = author.substring(pos);
109 pos = author.indexOf("</a>");
110 if (pos >= 0) {
111 author = author.substring(0, pos);
112 return StringUtils.unhtml(author);
113 }
114 }
115 }
116 } finally {
117 pageIn.close();
118 }
119 } catch (Exception e) {
120 // No author found
121 }
122 }
123 }
124 }
125
126 return null;
127 }
128
129 private String getTitle(InputStream in) throws IOException {
130 String title = getLine(in, "<title>", 0);
131 if (title != null) {
132 int pos = title.indexOf('>');
133 if (pos >= 0) {
134 title = title.substring(pos + 1);
135 pos = title.indexOf('<');
136 if (pos >= 0) {
137 title = title.substring(0, pos);
138 }
139 }
140
141 if (title.startsWith("Pool:")) {
142 title = title.substring("Pool:".length());
143 }
144
145 title = StringUtils.unhtml(title).trim();
146 }
147
148 return title;
149 }
150
151 @Override
152 protected String getDesc(URL source, InputStream in) throws IOException {
153 String desc = getLine(in, "margin-bottom: 2em;", 0);
154
155 if (desc != null) {
156 StringBuilder builder = new StringBuilder();
157
158 boolean inTags = false;
159 for (char car : desc.toCharArray()) {
160 if ((inTags && car == '>') || (!inTags && car == '<')) {
161 inTags = !inTags;
162 }
163
164 if (inTags) {
165 builder.append(car);
166 }
167 }
168
169 return builder.toString().trim();
170 }
171
172 return null;
173 }
174
175 @Override
176 protected List<Entry<String, URL>> getChapters(URL source, InputStream in)
177 throws IOException {
178 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
179 int last = 1; // no pool/show when only one page
180
181 @SuppressWarnings("resource")
182 Scanner scan = new Scanner(in, "UTF-8");
183 scan.useDelimiter("\\n");
184 while (scan.hasNext()) {
185 String line = scan.next();
186 for (int pos = line.indexOf(source.getPath()); pos >= 0; pos = line
187 .indexOf(source.getPath(), pos + source.getPath().length())) {
188 int equalPos = line.indexOf("=", pos);
189 int quotePos = line.indexOf("\"", pos);
190 if (equalPos >= 0 && quotePos > equalPos) {
191 String snum = line.substring(equalPos + 1, quotePos);
192 try {
193 int num = Integer.parseInt(snum);
194 if (num > last) {
195 last = num;
196 }
197 } catch (NumberFormatException e) {
198 }
199 }
200 }
201 }
202
203 for (int i = 1; i <= last; i++) {
204 final String key = Integer.toString(i);
205 final URL value = new URL(source.toString() + "?page=" + i);
206 urls.add(new Entry<String, URL>() {
207 public URL setValue(URL value) {
208 return null;
209 }
210
211 public URL getValue() {
212 return value;
213 }
214
215 public String getKey() {
216 return key;
217 }
218 });
219 }
220
221 return urls;
222 }
223
224 @Override
225 protected String getChapterContent(URL source, InputStream in, int number)
226 throws IOException {
227 StringBuilder builder = new StringBuilder();
228 String staticSite = "https://static1.e621.net";
229 if (source.getHost().contains("e926")) {
230 staticSite = staticSite.replace("e621", "e926");
231 }
232
233 String key = staticSite + "/data/preview/";
234
235 @SuppressWarnings("resource")
236 Scanner scan = new Scanner(in, "UTF-8");
237 scan.useDelimiter("\\n");
238 while (scan.hasNext()) {
239 String line = scan.next();
240 if (line.contains("class=\"preview\"")) {
241 for (int pos = line.indexOf(key); pos >= 0; pos = line.indexOf(
242 key, pos + key.length())) {
243 int endPos = line.indexOf("\"", pos);
244 if (endPos >= 0) {
245 String id = line.substring(pos + key.length(), endPos);
246 id = staticSite + "/data/" + id;
247
248 int dotPos = id.lastIndexOf(".");
249 if (dotPos >= 0) {
250 id = id.substring(0, dotPos);
251 builder.append("[");
252 builder.append(id);
253 builder.append("]\n");
254 }
255 }
256 }
257 }
258 }
259
260 return builder.toString();
261 }
262 }