Commit | Line | Data |
---|---|---|
08fe2e33 NR |
1 | package be.nikiroo.fanfix.supported; |
2 | ||
3 | import java.io.IOException; | |
4 | import java.io.InputStream; | |
5 | import java.net.MalformedURLException; | |
6 | import java.net.URL; | |
7 | import java.text.SimpleDateFormat; | |
ce297a79 | 8 | import java.util.AbstractMap; |
08fe2e33 NR |
9 | import java.util.ArrayList; |
10 | import java.util.Date; | |
11 | import java.util.List; | |
12 | import java.util.Map.Entry; | |
13 | import java.util.Scanner; | |
14 | ||
15 | import be.nikiroo.fanfix.Instance; | |
22848428 | 16 | import be.nikiroo.fanfix.bundles.Config; |
68686a37 | 17 | import be.nikiroo.fanfix.data.MetaData; |
16a81ef7 | 18 | import be.nikiroo.utils.Image; |
ed08c171 | 19 | import be.nikiroo.utils.Progress; |
08fe2e33 NR |
20 | import be.nikiroo.utils.StringUtils; |
21 | ||
22 | /** | |
23 | * Support class for <a href="http://www.fanfiction.net/">Faniction.net</a> | |
24 | * stories, a website dedicated to fanfictions of many, many different | |
25 | * universes, from TV shows to novels to games. | |
26 | * | |
27 | * @author niki | |
28 | */ | |
0ffa4754 | 29 | class Fanfiction extends BasicSupport_Deprecated { |
08fe2e33 NR |
30 | @Override |
31 | protected boolean isHtml() { | |
32 | return true; | |
33 | } | |
34 | ||
08fe2e33 | 35 | @Override |
68686a37 NR |
36 | protected MetaData getMeta(URL source, InputStream in) throws IOException { |
37 | MetaData meta = new MetaData(); | |
38 | ||
39 | meta.setTitle(getTitle(reset(in))); | |
40 | meta.setAuthor(getAuthor(reset(in))); | |
41 | meta.setDate(getDate(reset(in))); | |
42 | meta.setTags(getTags(reset(in))); | |
727108fe | 43 | meta.setSource(getType().getSourceName()); |
2206ef66 | 44 | meta.setUrl(source.toString()); |
727108fe | 45 | meta.setPublisher(getType().getSourceName()); |
68686a37 NR |
46 | meta.setUuid(source.toString()); |
47 | meta.setLuid(""); | |
17625a9f | 48 | meta.setLang("en"); // TODO find language of book |
68686a37 NR |
49 | meta.setSubject(getSubject(reset(in))); |
50 | meta.setType(getType().toString()); | |
51 | meta.setImageDocument(false); | |
52 | meta.setCover(getCover(source, reset(in))); | |
53 | ||
54 | return meta; | |
55 | } | |
56 | ||
57 | private String getSubject(InputStream in) { | |
08fe2e33 NR |
58 | String line = getLine(in, "id=pre_story_links", 0); |
59 | if (line != null) { | |
60 | int pos = line.lastIndexOf('"'); | |
61 | if (pos >= 1) { | |
62 | line = line.substring(pos + 1); | |
63 | pos = line.indexOf('<'); | |
64 | if (pos >= 0) { | |
68686a37 | 65 | return StringUtils.unhtml(line.substring(0, pos)).trim(); |
08fe2e33 NR |
66 | } |
67 | } | |
68 | } | |
69 | ||
70 | return null; | |
71 | } | |
72 | ||
211f7ddb | 73 | private List<String> getTags(InputStream in) { |
68686a37 | 74 | List<String> tags = new ArrayList<String>(); |
08fe2e33 NR |
75 | |
76 | String key = "title=\"Send Private Message\""; | |
77 | String line = getLine(in, key, 2); | |
78 | if (line != null) { | |
79 | key = "Rated:"; | |
80 | int pos = line.indexOf(key); | |
81 | if (pos >= 0) { | |
82 | line = line.substring(pos + key.length()); | |
83 | key = "Chapters:"; | |
84 | pos = line.indexOf(key); | |
85 | if (pos >= 0) { | |
86 | line = line.substring(0, pos); | |
87 | line = StringUtils.unhtml(line).trim(); | |
88 | if (line.endsWith("-")) { | |
89 | line = line.substring(0, line.length() - 1); | |
90 | } | |
91 | ||
92 | for (String tag : line.split("-")) { | |
68686a37 | 93 | tags.add(StringUtils.unhtml(tag).trim()); |
08fe2e33 NR |
94 | } |
95 | } | |
96 | } | |
97 | } | |
98 | ||
99 | return tags; | |
100 | } | |
101 | ||
68686a37 | 102 | private String getTitle(InputStream in) { |
08fe2e33 NR |
103 | int i = 0; |
104 | @SuppressWarnings("resource") | |
105 | Scanner scan = new Scanner(in, "UTF-8"); | |
106 | scan.useDelimiter("\\n"); | |
107 | while (scan.hasNext()) { | |
108 | String line = scan.next(); | |
109 | if (line.contains("xcontrast_txt")) { | |
110 | if ((++i) == 2) { | |
111 | line = StringUtils.unhtml(line).trim(); | |
112 | if (line.startsWith("Follow/Fav")) { | |
113 | line = line.substring("Follow/Fav".length()).trim(); | |
114 | } | |
115 | ||
68686a37 | 116 | return StringUtils.unhtml(line).trim(); |
08fe2e33 NR |
117 | } |
118 | } | |
119 | } | |
120 | ||
96f0625f | 121 | return ""; |
08fe2e33 NR |
122 | } |
123 | ||
68686a37 | 124 | private String getAuthor(InputStream in) { |
b4dc6ab5 NR |
125 | String author = null; |
126 | ||
08fe2e33 NR |
127 | int i = 0; |
128 | @SuppressWarnings("resource") | |
129 | Scanner scan = new Scanner(in, "UTF-8"); | |
130 | scan.useDelimiter("\\n"); | |
131 | while (scan.hasNext()) { | |
132 | String line = scan.next(); | |
133 | if (line.contains("xcontrast_txt")) { | |
134 | if ((++i) == 3) { | |
b4dc6ab5 NR |
135 | author = StringUtils.unhtml(line).trim(); |
136 | break; | |
08fe2e33 NR |
137 | } |
138 | } | |
139 | } | |
140 | ||
8d59ce07 | 141 | return bsHelper.fixAuthor(author); |
08fe2e33 NR |
142 | } |
143 | ||
68686a37 | 144 | private String getDate(InputStream in) { |
08fe2e33 NR |
145 | String key = "Published: <span data-xutime='"; |
146 | String line = getLine(in, key, 0); | |
147 | if (line != null) { | |
148 | int pos = line.indexOf(key); | |
149 | if (pos >= 0) { | |
150 | line = line.substring(pos + key.length()); | |
151 | pos = line.indexOf('\''); | |
152 | if (pos >= 0) { | |
153 | line = line.substring(0, pos).trim(); | |
154 | try { | |
155 | SimpleDateFormat sdf = new SimpleDateFormat( | |
84754696 | 156 | "yyyy-MM-dd"); |
08fe2e33 NR |
157 | return sdf |
158 | .format(new Date(1000 * Long.parseLong(line))); | |
159 | } catch (NumberFormatException e) { | |
d66deb8d NR |
160 | Instance.getInstance().getTraceHandler() |
161 | .error(new IOException("Cannot convert publication date: " + line, e)); | |
08fe2e33 NR |
162 | } |
163 | } | |
164 | } | |
165 | } | |
166 | ||
167 | return null; | |
168 | } | |
169 | ||
170 | @Override | |
171 | protected String getDesc(URL source, InputStream in) { | |
172 | return getLine(in, "title=\"Send Private Message\"", 1); | |
173 | } | |
174 | ||
16a81ef7 | 175 | private Image getCover(URL url, InputStream in) { |
08fe2e33 NR |
176 | String key = "class='cimage"; |
177 | String line = getLine(in, key, 0); | |
178 | if (line != null) { | |
179 | int pos = line.indexOf(key); | |
180 | if (pos >= 0) { | |
181 | line = line.substring(pos + key.length()); | |
182 | key = "src='"; | |
183 | pos = line.indexOf(key); | |
184 | if (pos >= 0) { | |
185 | line = line.substring(pos + key.length()); | |
186 | pos = line.indexOf('\''); | |
187 | if (pos >= 0) { | |
188 | line = line.substring(0, pos); | |
189 | if (line.startsWith("//")) { | |
190 | line = url.getProtocol() + "://" | |
191 | + line.substring(2); | |
192 | } else if (line.startsWith("//")) { | |
193 | line = url.getProtocol() + "://" + url.getHost() | |
194 | + "/" + line.substring(1); | |
195 | } else { | |
196 | line = url.getProtocol() + "://" + url.getHost() | |
197 | + "/" + url.getPath() + "/" + line; | |
198 | } | |
199 | ||
333f0e7b | 200 | return getImage(this, null, line); |
08fe2e33 NR |
201 | } |
202 | } | |
203 | } | |
204 | } | |
205 | ||
206 | return null; | |
207 | } | |
208 | ||
209 | @Override | |
ed08c171 NR |
210 | protected List<Entry<String, URL>> getChapters(URL source, InputStream in, |
211 | Progress pg) { | |
08fe2e33 NR |
212 | List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>(); |
213 | ||
214 | String base = source.toString(); | |
215 | int pos = base.lastIndexOf('/'); | |
216 | String suffix = base.substring(pos); // including '/' at start | |
217 | base = base.substring(0, pos); | |
218 | if (base.endsWith("/1")) { | |
219 | base = base.substring(0, base.length() - 1); // including '/' at end | |
220 | } | |
221 | ||
222 | String line = getLine(in, "id=chap_select", 0); | |
223 | String key = "<option value="; | |
224 | int i = 1; | |
08fe2e33 | 225 | |
333f0e7b NR |
226 | if (line != null) { |
227 | for (pos = line.indexOf(key); pos >= 0; pos = line | |
228 | .indexOf(key, pos), i++) { | |
229 | pos = line.indexOf('>', pos); | |
230 | if (pos >= 0) { | |
231 | int endOfName = line.indexOf('<', pos); | |
232 | if (endOfName >= 0) { | |
233 | String name = line.substring(pos + 1, endOfName); | |
234 | String chapNum = i + "."; | |
235 | if (name.startsWith(chapNum)) { | |
236 | name = name.substring(chapNum.length(), | |
237 | name.length()); | |
238 | } | |
239 | ||
240 | try { | |
ce297a79 NR |
241 | urls.add(new AbstractMap.SimpleEntry<String, URL>( |
242 | name.trim(), new URL(base + i + suffix))); | |
333f0e7b | 243 | } catch (MalformedURLException e) { |
d66deb8d NR |
244 | Instance.getInstance().getTraceHandler().error( |
245 | new IOException("Cannot parse chapter " + i + " url: " + (base + i + suffix), e)); | |
333f0e7b | 246 | } |
08fe2e33 NR |
247 | } |
248 | } | |
249 | } | |
333f0e7b NR |
250 | } else { |
251 | // only one chapter: | |
252 | final String chapName = getTitle(reset(in)); | |
253 | final URL chapURL = source; | |
254 | urls.add(new Entry<String, URL>() { | |
211f7ddb | 255 | @Override |
333f0e7b NR |
256 | public URL setValue(URL value) { |
257 | return null; | |
258 | } | |
259 | ||
211f7ddb | 260 | @Override |
333f0e7b NR |
261 | public URL getValue() { |
262 | return chapURL; | |
263 | } | |
264 | ||
211f7ddb | 265 | @Override |
333f0e7b NR |
266 | public String getKey() { |
267 | return chapName; | |
268 | } | |
269 | }); | |
08fe2e33 NR |
270 | } |
271 | ||
272 | return urls; | |
273 | } | |
274 | ||
275 | @Override | |
ed08c171 NR |
276 | protected String getChapterContent(URL source, InputStream in, int number, |
277 | Progress pg) { | |
08fe2e33 NR |
278 | StringBuilder builder = new StringBuilder(); |
279 | String startAt = "class='storytext "; | |
280 | String endAt1 = "function review_init"; | |
281 | String endAt2 = "id=chap_select"; | |
282 | boolean ok = false; | |
283 | ||
284 | @SuppressWarnings("resource") | |
285 | Scanner scan = new Scanner(in, "UTF-8"); | |
286 | scan.useDelimiter("\\n"); | |
287 | while (scan.hasNext()) { | |
288 | String line = scan.next(); | |
289 | if (!ok && line.contains(startAt)) { | |
290 | ok = true; | |
291 | } else if (ok && (line.contains(endAt1) || line.contains(endAt2))) { | |
292 | ok = false; | |
293 | break; | |
294 | } | |
295 | ||
296 | if (ok) { | |
297 | // First line may contain the title and chap name again | |
298 | if (builder.length() == 0) { | |
299 | int pos = line.indexOf("<hr"); | |
300 | if (pos >= 0) { | |
22848428 | 301 | boolean chaptered = false; |
d66deb8d NR |
302 | for (String lang : Instance.getInstance().getConfig().getList(Config.CONF_CHAPTER)) { |
303 | String chapterWord = Instance.getInstance().getConfig().getStringX(Config.CONF_CHAPTER, | |
304 | lang); | |
22848428 NR |
305 | int posChap = line.indexOf(chapterWord + " "); |
306 | if (posChap < pos) { | |
307 | chaptered = true; | |
308 | break; | |
309 | } | |
310 | } | |
311 | ||
312 | if (chaptered) { | |
313 | line = line.substring(pos); | |
314 | } | |
08fe2e33 NR |
315 | } |
316 | } | |
317 | ||
318 | builder.append(line); | |
406447a4 | 319 | builder.append(' '); |
08fe2e33 NR |
320 | } |
321 | } | |
322 | ||
323 | return builder.toString(); | |
324 | } | |
325 | ||
326 | @Override | |
327 | protected boolean supports(URL url) { | |
328 | return "fanfiction.net".equals(url.getHost()) | |
329 | || "www.fanfiction.net".equals(url.getHost()); | |
330 | } | |
331 | } |