Commit | Line | Data |
---|---|---|
08fe2e33 NR |
1 | package be.nikiroo.fanfix.supported; |
2 | ||
08fe2e33 NR |
3 | import java.io.IOException; |
4 | import java.io.InputStream; | |
08fe2e33 | 5 | import java.net.URL; |
08fe2e33 | 6 | import java.util.ArrayList; |
793f1071 | 7 | import java.util.Date; |
08fe2e33 NR |
8 | import java.util.HashMap; |
9 | import java.util.List; | |
10 | import java.util.Map; | |
11 | import java.util.Map.Entry; | |
0ffa4754 NR |
12 | |
13 | import org.jsoup.helper.DataUtil; | |
14 | import org.jsoup.nodes.Document; | |
15 | import org.jsoup.nodes.Element; | |
16 | import org.jsoup.nodes.Node; | |
08fe2e33 NR |
17 | |
18 | import be.nikiroo.fanfix.Instance; | |
08fe2e33 NR |
19 | import be.nikiroo.fanfix.bundles.StringId; |
20 | import be.nikiroo.fanfix.data.Chapter; | |
21 | import be.nikiroo.fanfix.data.MetaData; | |
9252c65e | 22 | import be.nikiroo.fanfix.data.Story; |
3b2b638f | 23 | import be.nikiroo.utils.Progress; |
08fe2e33 NR |
24 | import be.nikiroo.utils.StringUtils; |
25 | ||
26 | /** | |
27 | * This class is the base class used by the other support classes. It can be | |
28 | * used outside of this package, and have static method that you can use to get | |
29 | * access to the correct support class. | |
30 | * <p> | |
31 | * It will be used with 'resources' (usually web pages or files). | |
32 | * | |
33 | * @author niki | |
34 | */ | |
35 | public abstract class BasicSupport { | |
0ffa4754 NR |
36 | private Document sourceNode; |
37 | private URL source; | |
08fe2e33 | 38 | private SupportType type; |
22848428 | 39 | private URL currentReferer; // with only one 'r', as in 'HTTP'... |
08fe2e33 | 40 | |
08fe2e33 NR |
41 | /** |
42 | * The name of this support class. | |
43 | * | |
44 | * @return the name | |
45 | */ | |
46 | protected abstract String getSourceName(); | |
47 | ||
48 | /** | |
49 | * Check if the given resource is supported by this {@link BasicSupport}. | |
50 | * | |
51 | * @param url | |
52 | * the resource to check for | |
53 | * | |
54 | * @return TRUE if it is | |
55 | */ | |
56 | protected abstract boolean supports(URL url); | |
57 | ||
58 | /** | |
59 | * Return TRUE if the support will return HTML encoded content values for | |
60 | * the chapters content. | |
61 | * | |
62 | * @return TRUE for HTML | |
63 | */ | |
64 | protected abstract boolean isHtml(); | |
65 | ||
0efd25e3 NR |
66 | /** |
67 | * Return the {@link MetaData} of this story. | |
68 | * | |
776ad3c6 | 69 | * @return the associated {@link MetaData}, never NULL |
0efd25e3 NR |
70 | * |
71 | * @throws IOException | |
72 | * in case of I/O error | |
73 | */ | |
0ffa4754 | 74 | protected abstract MetaData getMeta() throws IOException; |
08fe2e33 NR |
75 | |
76 | /** | |
77 | * Return the story description. | |
78 | * | |
08fe2e33 NR |
79 | * @return the description |
80 | * | |
81 | * @throws IOException | |
82 | * in case of I/O error | |
83 | */ | |
0ffa4754 | 84 | protected abstract String getDesc() throws IOException; |
08fe2e33 | 85 | |
08fe2e33 | 86 | /** |
0ffa4754 NR |
87 | * Return the list of chapters (name and resource). * |
88 | * <p> | |
89 | * Can be NULL if this {@link BasicSupport} do no use chapters. | |
08fe2e33 | 90 | * |
ed08c171 NR |
91 | * @param pg |
92 | * the optional progress reporter | |
08fe2e33 | 93 | * |
0ffa4754 | 94 | * @return the chapters or NULL |
08fe2e33 NR |
95 | * |
96 | * @throws IOException | |
97 | * in case of I/O error | |
98 | */ | |
0ffa4754 NR |
99 | protected abstract List<Entry<String, URL>> getChapters(Progress pg) |
100 | throws IOException; | |
08fe2e33 NR |
101 | |
102 | /** | |
103 | * Return the content of the chapter (possibly HTML encoded, if | |
104 | * {@link BasicSupport#isHtml()} is TRUE). | |
105 | * | |
0ffa4754 NR |
106 | * @param chapUrl |
107 | * the chapter {@link URL} | |
08fe2e33 NR |
108 | * @param number |
109 | * the chapter number | |
ed08c171 NR |
110 | * @param pg |
111 | * the optional progress reporter | |
08fe2e33 NR |
112 | * |
113 | * @return the content | |
114 | * | |
115 | * @throws IOException | |
116 | * in case of I/O error | |
117 | */ | |
0ffa4754 NR |
118 | protected abstract String getChapterContent(URL chapUrl, int number, |
119 | Progress pg) throws IOException; | |
6e06d2cc | 120 | |
08fe2e33 NR |
121 | /** |
122 | * Return the list of cookies (values included) that must be used to | |
123 | * correctly fetch the resources. | |
124 | * <p> | |
125 | * You are expected to call the super method implementation if you override | |
126 | * it. | |
127 | * | |
128 | * @return the cookies | |
129 | */ | |
315f14ae | 130 | public Map<String, String> getCookies() { |
08fe2e33 NR |
131 | return new HashMap<String, String>(); |
132 | } | |
133 | ||
315f14ae NR |
134 | /** |
135 | * OAuth authorisation (aka, "bearer XXXXXXX"). | |
136 | * | |
137 | * @return the OAuth string | |
138 | */ | |
139 | public String getOAuth() { | |
140 | return null; | |
141 | } | |
142 | ||
a4143cd7 NR |
143 | /** |
144 | * Return the canonical form of the main {@link URL}. | |
145 | * | |
146 | * @param source | |
0ffa4754 NR |
147 | * the source {@link URL}, which can be NULL |
148 | * | |
149 | * @return the canonical form of this {@link URL} or NULL if the source was | |
150 | * NULL | |
151 | */ | |
152 | protected URL getCanonicalUrl(URL source) { | |
153 | return source; | |
154 | } | |
155 | ||
156 | /** | |
157 | * The main {@link Node} for this {@link Story}. | |
158 | * | |
159 | * @return the node | |
160 | */ | |
161 | protected Element getSourceNode() { | |
162 | return sourceNode; | |
163 | } | |
164 | ||
165 | /** | |
166 | * The main {@link URL} for this {@link Story}. | |
167 | * | |
168 | * @return the URL | |
169 | */ | |
170 | protected URL getSource() { | |
171 | return source; | |
172 | } | |
173 | ||
174 | /** | |
175 | * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e., | |
176 | * the current {@link URL} we work on. | |
177 | * | |
178 | * @return the referer | |
179 | */ | |
180 | public URL getCurrentReferer() { | |
181 | return currentReferer; | |
182 | } | |
183 | ||
184 | /** | |
185 | * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e., | |
186 | * the current {@link URL} we work on. | |
187 | * | |
188 | * @param currentReferer | |
189 | * the new referer | |
190 | */ | |
191 | protected void setCurrentReferer(URL currentReferer) { | |
192 | this.currentReferer = currentReferer; | |
193 | } | |
194 | ||
195 | /** | |
196 | * The support type. | |
197 | * | |
198 | * @return the type | |
199 | */ | |
200 | public SupportType getType() { | |
201 | return type; | |
202 | } | |
203 | ||
204 | /** | |
205 | * The support type. | |
206 | * | |
207 | * @param type | |
208 | * the new type | |
209 | */ | |
210 | protected void setType(SupportType type) { | |
211 | this.type = type; | |
212 | } | |
213 | ||
214 | /** | |
215 | * Open an input link that will be used for the support. | |
216 | * <p> | |
217 | * Can return NULL, in which case you are supposed to work without an | |
218 | * {@link InputStream}. | |
219 | * | |
220 | * @param source | |
a4143cd7 NR |
221 | * the source {@link URL} |
222 | * | |
0ffa4754 NR |
223 | * @return the {@link InputStream} |
224 | * | |
225 | * @throws IOException | |
226 | * in case of I/O error | |
227 | */ | |
228 | protected Document loadDocument(URL source) throws IOException { | |
229 | String url = getCanonicalUrl(source).toString(); | |
230 | return DataUtil.load(Instance.getCache().open(source, this, false), | |
231 | "UTF-8", url.toString()); | |
232 | } | |
233 | ||
234 | /** | |
235 | * Log into the support (can be a no-op depending upon the support). | |
a4143cd7 NR |
236 | * |
237 | * @throws IOException | |
238 | * in case of I/O error | |
239 | */ | |
315f14ae | 240 | @SuppressWarnings("unused") |
0ffa4754 NR |
241 | protected void login() throws IOException { |
242 | } | |
243 | ||
244 | /** | |
245 | * Prepare the support if needed before processing. | |
246 | * | |
247 | * @throws IOException | |
248 | * on I/O error | |
249 | */ | |
250 | @SuppressWarnings("unused") | |
251 | protected void preprocess() throws IOException { | |
252 | } | |
253 | ||
254 | /** | |
255 | * Now that we have processed the {@link Story}, close the resources if any. | |
256 | */ | |
257 | protected void close() { | |
258 | setCurrentReferer(null); | |
a4143cd7 NR |
259 | } |
260 | ||
08fe2e33 NR |
261 | /** |
262 | * Process the given story resource into a partially filled {@link Story} | |
263 | * object containing the name and metadata, except for the description. | |
264 | * | |
08fe2e33 NR |
265 | * @return the {@link Story} |
266 | * | |
267 | * @throws IOException | |
268 | * in case of I/O error | |
269 | */ | |
0ffa4754 NR |
270 | public Story processMeta() throws IOException { |
271 | Story story = null; | |
272 | ||
273 | preprocess(); | |
274 | try { | |
275 | story = processMeta(false, null); | |
276 | } finally { | |
277 | close(); | |
278 | } | |
279 | ||
280 | return story; | |
08fe2e33 NR |
281 | } |
282 | ||
283 | /** | |
284 | * Process the given story resource into a partially filled {@link Story} | |
285 | * object containing the name and metadata. | |
286 | * | |
0efd25e3 NR |
287 | * @param getDesc |
288 | * retrieve the description of the story, or not | |
ed08c171 NR |
289 | * @param pg |
290 | * the optional progress reporter | |
08fe2e33 | 291 | * |
776ad3c6 | 292 | * @return the {@link Story}, never NULL |
08fe2e33 NR |
293 | * |
294 | * @throws IOException | |
295 | * in case of I/O error | |
296 | */ | |
0ffa4754 NR |
297 | protected Story processMeta(boolean getDesc, Progress pg) |
298 | throws IOException { | |
ed08c171 NR |
299 | if (pg == null) { |
300 | pg = new Progress(); | |
301 | } else { | |
302 | pg.setMinMax(0, 100); | |
303 | } | |
304 | ||
0ffa4754 | 305 | pg.setProgress(30); |
ed08c171 | 306 | |
0ffa4754 NR |
307 | Story story = new Story(); |
308 | MetaData meta = getMeta(); | |
309 | if (meta.getCreationDate() == null || meta.getCreationDate().isEmpty()) { | |
310 | meta.setCreationDate(StringUtils.fromTime(new Date().getTime())); | |
311 | } | |
312 | story.setMeta(meta); | |
ed08c171 | 313 | |
0ffa4754 | 314 | pg.setProgress(50); |
08fe2e33 | 315 | |
0ffa4754 NR |
316 | if (meta.getCover() == null) { |
317 | meta.setCover(BasicSupportHelper.getDefaultCover(meta.getSubject())); | |
318 | } | |
08fe2e33 | 319 | |
0ffa4754 | 320 | pg.setProgress(60); |
a4143cd7 | 321 | |
0ffa4754 NR |
322 | if (getDesc) { |
323 | String descChapterName = Instance.getTrans().getString( | |
324 | StringId.DESCRIPTION); | |
325 | story.getMeta().setResume( | |
326 | BasicSupportPara.makeChapter(this, source, 0, | |
327 | descChapterName, // | |
328 | getDesc(), isHtml(), null)); | |
08fe2e33 | 329 | } |
0ffa4754 NR |
330 | |
331 | pg.setProgress(100); | |
332 | return story; | |
08fe2e33 NR |
333 | } |
334 | ||
335 | /** | |
336 | * Process the given story resource into a fully filled {@link Story} | |
337 | * object. | |
338 | * | |
92fb0719 NR |
339 | * @param pg |
340 | * the optional progress reporter | |
08fe2e33 | 341 | * |
776ad3c6 | 342 | * @return the {@link Story}, never NULL |
08fe2e33 NR |
343 | * |
344 | * @throws IOException | |
345 | * in case of I/O error | |
346 | */ | |
0ffa4754 | 347 | public Story process(Progress pg) throws IOException { |
92fb0719 NR |
348 | if (pg == null) { |
349 | pg = new Progress(); | |
350 | } else { | |
351 | pg.setMinMax(0, 100); | |
352 | } | |
353 | ||
0ffa4754 NR |
354 | setCurrentReferer(source); |
355 | login(); | |
356 | sourceNode = loadDocument(source); | |
357 | ||
92fb0719 | 358 | pg.setProgress(1); |
08fe2e33 | 359 | try { |
ed08c171 NR |
360 | Progress pgMeta = new Progress(); |
361 | pg.addProgress(pgMeta, 10); | |
0ffa4754 NR |
362 | preprocess(); |
363 | Story story = processMeta(true, pgMeta); | |
ed08c171 NR |
364 | if (!pgMeta.isDone()) { |
365 | pgMeta.setProgress(pgMeta.getMax()); // 10% | |
366 | } | |
367 | ||
754a5bc2 NR |
368 | pg.setName("Retrieving " + story.getMeta().getTitle()); |
369 | ||
ed08c171 NR |
370 | Progress pgGetChapters = new Progress(); |
371 | pg.addProgress(pgGetChapters, 10); | |
08fe2e33 | 372 | story.setChapters(new ArrayList<Chapter>()); |
0ffa4754 | 373 | List<Entry<String, URL>> chapters = getChapters(pgGetChapters); |
ed08c171 NR |
374 | if (!pgGetChapters.isDone()) { |
375 | pgGetChapters.setProgress(pgGetChapters.getMax()); // 20% | |
376 | } | |
08fe2e33 | 377 | |
08fe2e33 | 378 | if (chapters != null) { |
ed08c171 NR |
379 | Progress pgChaps = new Progress("Extracting chapters", 0, |
380 | chapters.size() * 300); | |
92fb0719 NR |
381 | pg.addProgress(pgChaps, 80); |
382 | ||
793f1071 | 383 | long words = 0; |
ed08c171 | 384 | int i = 1; |
08fe2e33 | 385 | for (Entry<String, URL> chap : chapters) { |
ed08c171 | 386 | pgChaps.setName("Extracting chapter " + i); |
0ffa4754 NR |
387 | URL chapUrl = chap.getValue(); |
388 | String chapName = chap.getKey(); | |
389 | if (chapUrl != null) { | |
390 | setCurrentReferer(chapUrl); | |
315f14ae | 391 | } |
ed08c171 | 392 | |
0ffa4754 NR |
393 | pgChaps.setProgress(i * 100); |
394 | Progress pgGetChapterContent = new Progress(); | |
395 | Progress pgMakeChapter = new Progress(); | |
396 | pgChaps.addProgress(pgGetChapterContent, 100); | |
397 | pgChaps.addProgress(pgMakeChapter, 100); | |
398 | ||
399 | String content = getChapterContent(chapUrl, i, | |
400 | pgGetChapterContent); | |
401 | if (!pgGetChapterContent.isDone()) { | |
402 | pgGetChapterContent.setProgress(pgGetChapterContent | |
403 | .getMax()); | |
404 | } | |
ed08c171 | 405 | |
0ffa4754 NR |
406 | Chapter cc = BasicSupportPara.makeChapter(this, chapUrl, i, |
407 | chapName, content, isHtml(), pgMakeChapter); | |
408 | if (!pgMakeChapter.isDone()) { | |
409 | pgMakeChapter.setProgress(pgMakeChapter.getMax()); | |
08fe2e33 | 410 | } |
a6395bef | 411 | |
0ffa4754 NR |
412 | words += cc.getWords(); |
413 | story.getChapters().add(cc); | |
414 | story.getMeta().setWords(words); | |
415 | ||
ed08c171 | 416 | i++; |
08fe2e33 | 417 | } |
ed08c171 NR |
418 | |
419 | pgChaps.setName("Extracting chapters"); | |
92fb0719 | 420 | } else { |
ed08c171 | 421 | pg.setProgress(80); |
08fe2e33 NR |
422 | } |
423 | ||
424 | return story; | |
08fe2e33 | 425 | } finally { |
0ffa4754 | 426 | close(); |
08fe2e33 NR |
427 | } |
428 | } | |
429 | ||
430 | /** | |
0ffa4754 NR |
431 | * Return a {@link BasicSupport} implementation supporting the given |
432 | * resource if possible. | |
08fe2e33 | 433 | * |
0ffa4754 NR |
434 | * @param url |
435 | * the story resource | |
08fe2e33 | 436 | * |
0ffa4754 | 437 | * @return an implementation that supports it, or NULL |
08fe2e33 | 438 | */ |
0ffa4754 NR |
439 | public static BasicSupport getSupport(URL url) { |
440 | if (url == null) { | |
441 | return null; | |
442 | } | |
08fe2e33 | 443 | |
0ffa4754 NR |
444 | // TEXT and INFO_TEXT always support files (not URLs though) |
445 | for (SupportType type : SupportType.values()) { | |
446 | if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) { | |
447 | BasicSupport support = getSupport(type, url); | |
448 | if (support != null && support.supports(url)) { | |
449 | return support; | |
450 | } | |
451 | } | |
452 | } | |
08fe2e33 | 453 | |
0ffa4754 NR |
454 | for (SupportType type : new SupportType[] { SupportType.INFO_TEXT, |
455 | SupportType.TEXT }) { | |
456 | BasicSupport support = getSupport(type, url); | |
457 | if (support != null && support.supports(url)) { | |
458 | return support; | |
459 | } | |
460 | } | |
461 | ||
462 | return null; | |
08fe2e33 NR |
463 | } |
464 | ||
465 | /** | |
0ffa4754 | 466 | * Return a {@link BasicSupport} implementation supporting the given type. |
08fe2e33 | 467 | * |
0ffa4754 NR |
468 | * @param type |
469 | * the type | |
470 | * @param url | |
471 | * the {@link URL} to support (can be NULL to get an | |
472 | * "abstract support") | |
08fe2e33 | 473 | * |
0ffa4754 | 474 | * @return an implementation that supports it, or NULL |
08fe2e33 | 475 | */ |
0ffa4754 NR |
476 | public static BasicSupport getSupport(SupportType type, URL url) { |
477 | BasicSupport support = null; | |
08fe2e33 | 478 | |
08fe2e33 NR |
479 | switch (type) { |
480 | case EPUB: | |
0ffa4754 NR |
481 | support = new Epub(); |
482 | break; | |
08fe2e33 | 483 | case INFO_TEXT: |
0ffa4754 NR |
484 | support = new InfoText(); |
485 | break; | |
08fe2e33 | 486 | case FIMFICTION: |
315f14ae NR |
487 | try { |
488 | // Can fail if no client key or NO in options | |
0ffa4754 | 489 | support = new FimfictionApi(); |
315f14ae | 490 | } catch (IOException e) { |
0ffa4754 | 491 | support = new Fimfiction(); |
315f14ae | 492 | } |
0ffa4754 | 493 | break; |
08fe2e33 | 494 | case FANFICTION: |
0ffa4754 NR |
495 | support = new Fanfiction(); |
496 | break; | |
08fe2e33 | 497 | case TEXT: |
0ffa4754 NR |
498 | support = new Text(); |
499 | break; | |
08fe2e33 | 500 | case MANGAFOX: |
0ffa4754 NR |
501 | support = new MangaFox(); |
502 | break; | |
08fe2e33 | 503 | case E621: |
0ffa4754 NR |
504 | support = new E621(); |
505 | break; | |
a4143cd7 | 506 | case YIFFSTAR: |
0ffa4754 NR |
507 | support = new YiffStar(); |
508 | break; | |
f0608ab1 | 509 | case E_HENTAI: |
0ffa4754 NR |
510 | support = new EHentai(); |
511 | break; | |
08fe2e33 | 512 | case CBZ: |
0ffa4754 NR |
513 | support = new Cbz(); |
514 | break; | |
373da363 | 515 | case HTML: |
0ffa4754 NR |
516 | support = new Html(); |
517 | break; | |
68686a37 NR |
518 | } |
519 | ||
0ffa4754 NR |
520 | if (support != null) { |
521 | support.setType(type); | |
522 | support.source = support.getCanonicalUrl(url); | |
315f14ae NR |
523 | } |
524 | ||
0ffa4754 | 525 | return support; |
315f14ae | 526 | } |
08fe2e33 | 527 | } |