Library scanning much quicker
[fanfix.git] / src / be / nikiroo / fanfix / supported / Fanfiction.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.awt.image.BufferedImage;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.net.MalformedURLException;
7 import java.net.URL;
8 import java.text.SimpleDateFormat;
9 import java.util.ArrayList;
10 import java.util.Date;
11 import java.util.List;
12 import java.util.Map.Entry;
13 import java.util.Scanner;
14
15 import be.nikiroo.fanfix.Instance;
16 import be.nikiroo.fanfix.data.MetaData;
17 import be.nikiroo.utils.StringUtils;
18
19 /**
20 * Support class for <a href="http://www.fanfiction.net/">Faniction.net</a>
21 * stories, a website dedicated to fanfictions of many, many different
22 * universes, from TV shows to novels to games.
23 *
24 * @author niki
25 */
26 class Fanfiction extends BasicSupport {
27 @Override
28 protected boolean isHtml() {
29 return true;
30 }
31
32 @Override
33 public String getSourceName() {
34 return "Fanfiction.net";
35 }
36
37 @Override
38 protected MetaData getMeta(URL source, InputStream in) throws IOException {
39 MetaData meta = new MetaData();
40
41 meta.setTitle(getTitle(reset(in)));
42 meta.setAuthor(getAuthor(reset(in)));
43 meta.setDate(getDate(reset(in)));
44 meta.setTags(getTags(reset(in)));
45 meta.setSource(getSourceName());
46 meta.setPublisher(getSourceName());
47 meta.setUuid(source.toString());
48 meta.setLuid("");
49 meta.setLang("EN");
50 meta.setSubject(getSubject(reset(in)));
51 meta.setType(getType().toString());
52 meta.setImageDocument(false);
53 meta.setCover(getCover(source, reset(in)));
54
55 return meta;
56 }
57
58 private String getSubject(InputStream in) {
59 String line = getLine(in, "id=pre_story_links", 0);
60 if (line != null) {
61 int pos = line.lastIndexOf('"');
62 if (pos >= 1) {
63 line = line.substring(pos + 1);
64 pos = line.indexOf('<');
65 if (pos >= 0) {
66 return StringUtils.unhtml(line.substring(0, pos)).trim();
67 }
68 }
69 }
70
71 return null;
72 }
73
74 private List<String> getTags(InputStream in) throws IOException {
75 List<String> tags = new ArrayList<String>();
76
77 String key = "title=\"Send Private Message\"";
78 String line = getLine(in, key, 2);
79 if (line != null) {
80 key = "Rated:";
81 int pos = line.indexOf(key);
82 if (pos >= 0) {
83 line = line.substring(pos + key.length());
84 key = "Chapters:";
85 pos = line.indexOf(key);
86 if (pos >= 0) {
87 line = line.substring(0, pos);
88 line = StringUtils.unhtml(line).trim();
89 if (line.endsWith("-")) {
90 line = line.substring(0, line.length() - 1);
91 }
92
93 for (String tag : line.split("-")) {
94 tags.add(StringUtils.unhtml(tag).trim());
95 }
96 }
97 }
98 }
99
100 return tags;
101 }
102
103 private String getTitle(InputStream in) {
104 int i = 0;
105 @SuppressWarnings("resource")
106 Scanner scan = new Scanner(in, "UTF-8");
107 scan.useDelimiter("\\n");
108 while (scan.hasNext()) {
109 String line = scan.next();
110 if (line.contains("xcontrast_txt")) {
111 if ((++i) == 2) {
112 line = StringUtils.unhtml(line).trim();
113 if (line.startsWith("Follow/Fav")) {
114 line = line.substring("Follow/Fav".length()).trim();
115 }
116
117 return StringUtils.unhtml(line).trim();
118 }
119 }
120 }
121
122 return null;
123 }
124
125 private String getAuthor(InputStream in) {
126 int i = 0;
127 @SuppressWarnings("resource")
128 Scanner scan = new Scanner(in, "UTF-8");
129 scan.useDelimiter("\\n");
130 while (scan.hasNext()) {
131 String line = scan.next();
132 if (line.contains("xcontrast_txt")) {
133 if ((++i) == 3) {
134 return StringUtils.unhtml(line).trim();
135 }
136 }
137 }
138
139 return null;
140 }
141
142 private String getDate(InputStream in) {
143 String key = "Published: <span data-xutime='";
144 String line = getLine(in, key, 0);
145 if (line != null) {
146 int pos = line.indexOf(key);
147 if (pos >= 0) {
148 line = line.substring(pos + key.length());
149 pos = line.indexOf('\'');
150 if (pos >= 0) {
151 line = line.substring(0, pos).trim();
152 try {
153 SimpleDateFormat sdf = new SimpleDateFormat(
154 "YYYY-MM-dd");
155 return sdf
156 .format(new Date(1000 * Long.parseLong(line)));
157 } catch (NumberFormatException e) {
158 Instance.syserr(new IOException(
159 "Cannot convert publication date: " + line, e));
160 }
161 }
162 }
163 }
164
165 return null;
166 }
167
168 @Override
169 protected String getDesc(URL source, InputStream in) {
170 return getLine(in, "title=\"Send Private Message\"", 1);
171 }
172
173 private BufferedImage getCover(URL url, InputStream in) {
174 String key = "class='cimage";
175 String line = getLine(in, key, 0);
176 if (line != null) {
177 int pos = line.indexOf(key);
178 if (pos >= 0) {
179 line = line.substring(pos + key.length());
180 key = "src='";
181 pos = line.indexOf(key);
182 if (pos >= 0) {
183 line = line.substring(pos + key.length());
184 pos = line.indexOf('\'');
185 if (pos >= 0) {
186 line = line.substring(0, pos);
187 if (line.startsWith("//")) {
188 line = url.getProtocol() + "://"
189 + line.substring(2);
190 } else if (line.startsWith("//")) {
191 line = url.getProtocol() + "://" + url.getHost()
192 + "/" + line.substring(1);
193 } else {
194 line = url.getProtocol() + "://" + url.getHost()
195 + "/" + url.getPath() + "/" + line;
196 }
197
198 return getImage(null, line);
199 }
200 }
201 }
202 }
203
204 return null;
205 }
206
207 @Override
208 protected List<Entry<String, URL>> getChapters(URL source, InputStream in) {
209 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
210
211 String base = source.toString();
212 int pos = base.lastIndexOf('/');
213 String suffix = base.substring(pos); // including '/' at start
214 base = base.substring(0, pos);
215 if (base.endsWith("/1")) {
216 base = base.substring(0, base.length() - 1); // including '/' at end
217 }
218
219 String line = getLine(in, "id=chap_select", 0);
220 String key = "<option value=";
221 int i = 1;
222 for (pos = line.indexOf(key); pos >= 0; pos = line.indexOf(key, pos), i++) {
223 pos = line.indexOf('>', pos);
224 if (pos >= 0) {
225 int endOfName = line.indexOf('<', pos);
226 if (endOfName >= 0) {
227 String name = line.substring(pos + 1, endOfName);
228 String chapNum = i + ".";
229 if (name.startsWith(chapNum)) {
230 name = name.substring(chapNum.length(), name.length());
231 }
232
233 try {
234 final String chapName = name.trim();
235 final URL chapURL = new URL(base + i + suffix);
236 urls.add(new Entry<String, URL>() {
237 public URL setValue(URL value) {
238 return null;
239 }
240
241 public URL getValue() {
242 return chapURL;
243 }
244
245 public String getKey() {
246 return chapName;
247 }
248 });
249 } catch (MalformedURLException e) {
250 Instance.syserr(new IOException("Cannot parse chapter "
251 + i + " url: " + (base + i + suffix), e));
252 }
253 }
254 }
255 }
256
257 return urls;
258 }
259
260 @Override
261 protected String getChapterContent(URL source, InputStream in, int number) {
262 StringBuilder builder = new StringBuilder();
263 String startAt = "class='storytext ";
264 String endAt1 = "function review_init";
265 String endAt2 = "id=chap_select";
266 boolean ok = false;
267
268 @SuppressWarnings("resource")
269 Scanner scan = new Scanner(in, "UTF-8");
270 scan.useDelimiter("\\n");
271 while (scan.hasNext()) {
272 String line = scan.next();
273 if (!ok && line.contains(startAt)) {
274 ok = true;
275 } else if (ok && (line.contains(endAt1) || line.contains(endAt2))) {
276 ok = false;
277 break;
278 }
279
280 if (ok) {
281 // First line may contain the title and chap name again
282 if (builder.length() == 0) {
283 int pos = line.indexOf("<hr");
284 if (pos >= 0) {
285 line = line.substring(pos);
286 }
287 }
288
289 builder.append(line);
290 }
291 }
292
293 return builder.toString();
294 }
295
296 @Override
297 protected boolean supports(URL url) {
298 return "fanfiction.net".equals(url.getHost())
299 || "www.fanfiction.net".equals(url.getHost());
300 }
301 }