Commit | Line | Data |
---|---|---|
08fe2e33 NR |
1 | package be.nikiroo.fanfix.supported; |
2 | ||
08fe2e33 NR |
3 | import java.io.IOException; |
4 | import java.io.InputStream; | |
08fe2e33 | 5 | import java.net.URL; |
08fe2e33 | 6 | import java.util.ArrayList; |
793f1071 | 7 | import java.util.Date; |
08fe2e33 NR |
8 | import java.util.HashMap; |
9 | import java.util.List; | |
10 | import java.util.Map; | |
11 | import java.util.Map.Entry; | |
0ffa4754 NR |
12 | |
13 | import org.jsoup.helper.DataUtil; | |
14 | import org.jsoup.nodes.Document; | |
15 | import org.jsoup.nodes.Element; | |
16 | import org.jsoup.nodes.Node; | |
08fe2e33 NR |
17 | |
18 | import be.nikiroo.fanfix.Instance; | |
08fe2e33 NR |
19 | import be.nikiroo.fanfix.bundles.StringId; |
20 | import be.nikiroo.fanfix.data.Chapter; | |
21 | import be.nikiroo.fanfix.data.MetaData; | |
9252c65e | 22 | import be.nikiroo.fanfix.data.Story; |
3b2b638f | 23 | import be.nikiroo.utils.Progress; |
08fe2e33 NR |
24 | import be.nikiroo.utils.StringUtils; |
25 | ||
26 | /** | |
27 | * This class is the base class used by the other support classes. It can be | |
28 | * used outside of this package, and have static method that you can use to get | |
29 | * access to the correct support class. | |
30 | * <p> | |
31 | * It will be used with 'resources' (usually web pages or files). | |
32 | * | |
33 | * @author niki | |
34 | */ | |
35 | public abstract class BasicSupport { | |
0ffa4754 NR |
36 | private Document sourceNode; |
37 | private URL source; | |
08fe2e33 | 38 | private SupportType type; |
22848428 | 39 | private URL currentReferer; // with only one 'r', as in 'HTTP'... |
8d59ce07 NR |
40 | |
41 | static protected BasicSupportHelper bsHelper = new BasicSupportHelper(); | |
42 | static protected BasicSupportImages bsImages = new BasicSupportImages(); | |
43 | static protected BasicSupportPara bsPara = new BasicSupportPara(new BasicSupportHelper(), new BasicSupportImages()); | |
08fe2e33 | 44 | |
08fe2e33 NR |
45 | /** |
46 | * Check if the given resource is supported by this {@link BasicSupport}. | |
47 | * | |
48 | * @param url | |
49 | * the resource to check for | |
50 | * | |
51 | * @return TRUE if it is | |
52 | */ | |
53 | protected abstract boolean supports(URL url); | |
54 | ||
55 | /** | |
56 | * Return TRUE if the support will return HTML encoded content values for | |
57 | * the chapters content. | |
58 | * | |
59 | * @return TRUE for HTML | |
60 | */ | |
61 | protected abstract boolean isHtml(); | |
62 | ||
0efd25e3 NR |
63 | /** |
64 | * Return the {@link MetaData} of this story. | |
65 | * | |
776ad3c6 | 66 | * @return the associated {@link MetaData}, never NULL |
0efd25e3 NR |
67 | * |
68 | * @throws IOException | |
69 | * in case of I/O error | |
70 | */ | |
0ffa4754 | 71 | protected abstract MetaData getMeta() throws IOException; |
08fe2e33 NR |
72 | |
73 | /** | |
74 | * Return the story description. | |
75 | * | |
08fe2e33 NR |
76 | * @return the description |
77 | * | |
78 | * @throws IOException | |
79 | * in case of I/O error | |
80 | */ | |
0ffa4754 | 81 | protected abstract String getDesc() throws IOException; |
08fe2e33 | 82 | |
08fe2e33 | 83 | /** |
826e4569 | 84 | * Return the list of chapters (name and resource). |
0ffa4754 NR |
85 | * <p> |
86 | * Can be NULL if this {@link BasicSupport} do no use chapters. | |
08fe2e33 | 87 | * |
ed08c171 NR |
88 | * @param pg |
89 | * the optional progress reporter | |
08fe2e33 | 90 | * |
0ffa4754 | 91 | * @return the chapters or NULL |
08fe2e33 NR |
92 | * |
93 | * @throws IOException | |
94 | * in case of I/O error | |
95 | */ | |
0ffa4754 NR |
96 | protected abstract List<Entry<String, URL>> getChapters(Progress pg) |
97 | throws IOException; | |
08fe2e33 NR |
98 | |
99 | /** | |
100 | * Return the content of the chapter (possibly HTML encoded, if | |
101 | * {@link BasicSupport#isHtml()} is TRUE). | |
102 | * | |
0ffa4754 NR |
103 | * @param chapUrl |
104 | * the chapter {@link URL} | |
08fe2e33 NR |
105 | * @param number |
106 | * the chapter number | |
ed08c171 NR |
107 | * @param pg |
108 | * the optional progress reporter | |
08fe2e33 NR |
109 | * |
110 | * @return the content | |
111 | * | |
112 | * @throws IOException | |
113 | * in case of I/O error | |
114 | */ | |
0ffa4754 NR |
115 | protected abstract String getChapterContent(URL chapUrl, int number, |
116 | Progress pg) throws IOException; | |
6e06d2cc | 117 | |
08fe2e33 NR |
118 | /** |
119 | * Return the list of cookies (values included) that must be used to | |
120 | * correctly fetch the resources. | |
121 | * <p> | |
122 | * You are expected to call the super method implementation if you override | |
123 | * it. | |
124 | * | |
125 | * @return the cookies | |
126 | */ | |
315f14ae | 127 | public Map<String, String> getCookies() { |
08fe2e33 NR |
128 | return new HashMap<String, String>(); |
129 | } | |
130 | ||
315f14ae NR |
131 | /** |
132 | * OAuth authorisation (aka, "bearer XXXXXXX"). | |
133 | * | |
134 | * @return the OAuth string | |
135 | */ | |
136 | public String getOAuth() { | |
137 | return null; | |
138 | } | |
139 | ||
a4143cd7 NR |
140 | /** |
141 | * Return the canonical form of the main {@link URL}. | |
142 | * | |
143 | * @param source | |
0ffa4754 NR |
144 | * the source {@link URL}, which can be NULL |
145 | * | |
146 | * @return the canonical form of this {@link URL} or NULL if the source was | |
147 | * NULL | |
148 | */ | |
149 | protected URL getCanonicalUrl(URL source) { | |
150 | return source; | |
151 | } | |
152 | ||
153 | /** | |
154 | * The main {@link Node} for this {@link Story}. | |
155 | * | |
156 | * @return the node | |
157 | */ | |
158 | protected Element getSourceNode() { | |
159 | return sourceNode; | |
160 | } | |
161 | ||
162 | /** | |
163 | * The main {@link URL} for this {@link Story}. | |
164 | * | |
165 | * @return the URL | |
166 | */ | |
167 | protected URL getSource() { | |
168 | return source; | |
169 | } | |
170 | ||
171 | /** | |
172 | * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e., | |
173 | * the current {@link URL} we work on. | |
174 | * | |
175 | * @return the referer | |
176 | */ | |
177 | public URL getCurrentReferer() { | |
178 | return currentReferer; | |
179 | } | |
180 | ||
181 | /** | |
182 | * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e., | |
183 | * the current {@link URL} we work on. | |
184 | * | |
185 | * @param currentReferer | |
186 | * the new referer | |
187 | */ | |
188 | protected void setCurrentReferer(URL currentReferer) { | |
189 | this.currentReferer = currentReferer; | |
190 | } | |
191 | ||
192 | /** | |
193 | * The support type. | |
194 | * | |
195 | * @return the type | |
196 | */ | |
197 | public SupportType getType() { | |
198 | return type; | |
199 | } | |
200 | ||
201 | /** | |
202 | * The support type. | |
203 | * | |
204 | * @param type | |
205 | * the new type | |
206 | */ | |
207 | protected void setType(SupportType type) { | |
208 | this.type = type; | |
209 | } | |
210 | ||
211 | /** | |
212 | * Open an input link that will be used for the support. | |
213 | * <p> | |
7445f856 NR |
214 | * Can return NULL, in which case you are supposed to work without a source |
215 | * node. | |
0ffa4754 NR |
216 | * |
217 | * @param source | |
a4143cd7 NR |
218 | * the source {@link URL} |
219 | * | |
0ffa4754 NR |
220 | * @return the {@link InputStream} |
221 | * | |
222 | * @throws IOException | |
223 | * in case of I/O error | |
224 | */ | |
225 | protected Document loadDocument(URL source) throws IOException { | |
226 | String url = getCanonicalUrl(source).toString(); | |
d66deb8d | 227 | return DataUtil.load(Instance.getInstance().getCache().open(source, this, false), "UTF-8", url.toString()); |
0ffa4754 NR |
228 | } |
229 | ||
230 | /** | |
231 | * Log into the support (can be a no-op depending upon the support). | |
a4143cd7 NR |
232 | * |
233 | * @throws IOException | |
234 | * in case of I/O error | |
235 | */ | |
0ffa4754 NR |
236 | protected void login() throws IOException { |
237 | } | |
238 | ||
0ffa4754 NR |
239 | /** |
240 | * Now that we have processed the {@link Story}, close the resources if any. | |
241 | */ | |
242 | protected void close() { | |
243 | setCurrentReferer(null); | |
a4143cd7 NR |
244 | } |
245 | ||
08fe2e33 NR |
246 | /** |
247 | * Process the given story resource into a partially filled {@link Story} | |
248 | * object containing the name and metadata. | |
249 | * | |
0efd25e3 NR |
250 | * @param getDesc |
251 | * retrieve the description of the story, or not | |
ed08c171 NR |
252 | * @param pg |
253 | * the optional progress reporter | |
08fe2e33 | 254 | * |
776ad3c6 | 255 | * @return the {@link Story}, never NULL |
08fe2e33 NR |
256 | * |
257 | * @throws IOException | |
258 | * in case of I/O error | |
259 | */ | |
0ffa4754 NR |
260 | protected Story processMeta(boolean getDesc, Progress pg) |
261 | throws IOException { | |
ed08c171 NR |
262 | if (pg == null) { |
263 | pg = new Progress(); | |
264 | } else { | |
265 | pg.setMinMax(0, 100); | |
266 | } | |
267 | ||
0ffa4754 | 268 | pg.setProgress(30); |
ed08c171 | 269 | |
0ffa4754 NR |
270 | Story story = new Story(); |
271 | MetaData meta = getMeta(); | |
272 | if (meta.getCreationDate() == null || meta.getCreationDate().isEmpty()) { | |
273 | meta.setCreationDate(StringUtils.fromTime(new Date().getTime())); | |
274 | } | |
275 | story.setMeta(meta); | |
920af1c7 | 276 | pg.put("meta", meta); |
ed08c171 | 277 | |
0ffa4754 | 278 | pg.setProgress(50); |
08fe2e33 | 279 | |
0ffa4754 | 280 | if (meta.getCover() == null) { |
8d59ce07 | 281 | meta.setCover(bsHelper.getDefaultCover(meta.getSubject())); |
0ffa4754 | 282 | } |
08fe2e33 | 283 | |
0ffa4754 | 284 | pg.setProgress(60); |
a4143cd7 | 285 | |
0ffa4754 | 286 | if (getDesc) { |
d66deb8d NR |
287 | String descChapterName = Instance.getInstance().getTrans().getString(StringId.DESCRIPTION); |
288 | story.getMeta().setResume(bsPara.makeChapter(this, source, 0, descChapterName, // | |
289 | getDesc(), isHtml(), null)); | |
08fe2e33 | 290 | } |
0ffa4754 | 291 | |
fdc55375 | 292 | pg.done(); |
0ffa4754 | 293 | return story; |
08fe2e33 NR |
294 | } |
295 | ||
9005532f | 296 | /** |
826e4569 NR |
297 | * Process the given story resource into a fully filled {@link Story} |
298 | * object. | |
9005532f NR |
299 | * |
300 | * @param pg | |
301 | * the optional progress reporter | |
302 | * | |
303 | * @return the {@link Story}, never NULL | |
304 | * | |
305 | * @throws IOException | |
306 | * in case of I/O error | |
307 | */ | |
6569afb4 | 308 | // TODO: ADD final when BasicSupport_Deprecated is gone |
9005532f NR |
309 | public Story process(Progress pg) throws IOException { |
310 | setCurrentReferer(source); | |
311 | login(); | |
312 | sourceNode = loadDocument(source); | |
313 | ||
314 | try { | |
315 | return doProcess(pg); | |
316 | } finally { | |
317 | close(); | |
318 | } | |
319 | } | |
320 | ||
08fe2e33 | 321 | /** |
826e4569 NR |
322 | * Actual processing step, without the calls to other methods. |
323 | * <p> | |
324 | * Will convert the story resource into a fully filled {@link Story} object. | |
08fe2e33 | 325 | * |
92fb0719 NR |
326 | * @param pg |
327 | * the optional progress reporter | |
08fe2e33 | 328 | * |
776ad3c6 | 329 | * @return the {@link Story}, never NULL |
08fe2e33 NR |
330 | * |
331 | * @throws IOException | |
332 | * in case of I/O error | |
333 | */ | |
826e4569 | 334 | protected Story doProcess(Progress pg) throws IOException { |
92fb0719 NR |
335 | if (pg == null) { |
336 | pg = new Progress(); | |
337 | } else { | |
338 | pg.setMinMax(0, 100); | |
339 | } | |
3b039231 NR |
340 | |
341 | pg.setName("Initialising"); | |
92fb0719 | 342 | |
92fb0719 | 343 | pg.setProgress(1); |
9005532f NR |
344 | Progress pgMeta = new Progress(); |
345 | pg.addProgress(pgMeta, 10); | |
346 | Story story = processMeta(true, pgMeta); | |
68328e17 | 347 | pgMeta.done(); // 10% |
920af1c7 | 348 | pg.put("meta", story.getMeta()); |
ed08c171 | 349 | |
9005532f NR |
350 | Progress pgGetChapters = new Progress(); |
351 | pg.addProgress(pgGetChapters, 10); | |
352 | story.setChapters(new ArrayList<Chapter>()); | |
353 | List<Entry<String, URL>> chapters = getChapters(pgGetChapters); | |
68328e17 | 354 | pgGetChapters.done(); // 20% |
9005532f NR |
355 | |
356 | if (chapters != null) { | |
357 | Progress pgChaps = new Progress("Extracting chapters", 0, | |
358 | chapters.size() * 300); | |
359 | pg.addProgress(pgChaps, 80); | |
360 | ||
361 | long words = 0; | |
362 | int i = 1; | |
363 | for (Entry<String, URL> chap : chapters) { | |
364 | pgChaps.setName("Extracting chapter " + i); | |
365 | URL chapUrl = chap.getValue(); | |
366 | String chapName = chap.getKey(); | |
367 | if (chapUrl != null) { | |
368 | setCurrentReferer(chapUrl); | |
369 | } | |
370 | ||
371 | pgChaps.setProgress(i * 100); | |
372 | Progress pgGetChapterContent = new Progress(); | |
373 | Progress pgMakeChapter = new Progress(); | |
374 | pgChaps.addProgress(pgGetChapterContent, 100); | |
375 | pgChaps.addProgress(pgMakeChapter, 100); | |
376 | ||
377 | String content = getChapterContent(chapUrl, i, | |
378 | pgGetChapterContent); | |
68328e17 | 379 | pgGetChapterContent.done(); |
8d59ce07 | 380 | Chapter cc = bsPara.makeChapter(this, chapUrl, i, |
9005532f | 381 | chapName, content, isHtml(), pgMakeChapter); |
68328e17 | 382 | pgMakeChapter.done(); |
ed08c171 | 383 | |
9005532f NR |
384 | words += cc.getWords(); |
385 | story.getChapters().add(cc); | |
386 | story.getMeta().setWords(words); | |
387 | ||
388 | i++; | |
08fe2e33 NR |
389 | } |
390 | ||
9005532f | 391 | pgChaps.setName("Extracting chapters"); |
fdc55375 | 392 | pgChaps.done(); |
08fe2e33 | 393 | } |
9005532f | 394 | |
68328e17 NR |
395 | pg.done(); |
396 | ||
9005532f | 397 | return story; |
08fe2e33 NR |
398 | } |
399 | ||
99d71bd7 NR |
400 | /** |
401 | * Create a chapter from the given data. | |
402 | * | |
403 | * @param source | |
404 | * the source URL for this content, which can be used to try and | |
405 | * find images if images are present in the format [image-url] | |
406 | * @param number | |
407 | * the chapter number (0 = description) | |
408 | * @param name | |
409 | * the chapter name | |
410 | * @param content | |
411 | * the content of the chapter | |
412 | * @return the {@link Chapter} | |
413 | * | |
414 | * @throws IOException | |
415 | * in case of I/O error | |
416 | */ | |
417 | public Chapter makeChapter(URL source, int number, String name, | |
418 | String content) throws IOException { | |
8d59ce07 | 419 | return bsPara.makeChapter(this, source, number, name, |
99d71bd7 NR |
420 | content, isHtml(), null); |
421 | } | |
422 | ||
08fe2e33 | 423 | /** |
0ffa4754 NR |
424 | * Return a {@link BasicSupport} implementation supporting the given |
425 | * resource if possible. | |
08fe2e33 | 426 | * |
0ffa4754 NR |
427 | * @param url |
428 | * the story resource | |
08fe2e33 | 429 | * |
0ffa4754 | 430 | * @return an implementation that supports it, or NULL |
08fe2e33 | 431 | */ |
0ffa4754 NR |
432 | public static BasicSupport getSupport(URL url) { |
433 | if (url == null) { | |
434 | return null; | |
435 | } | |
08fe2e33 | 436 | |
0ffa4754 NR |
437 | // TEXT and INFO_TEXT always support files (not URLs though) |
438 | for (SupportType type : SupportType.values()) { | |
439 | if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) { | |
440 | BasicSupport support = getSupport(type, url); | |
441 | if (support != null && support.supports(url)) { | |
442 | return support; | |
443 | } | |
444 | } | |
445 | } | |
08fe2e33 | 446 | |
0ffa4754 NR |
447 | for (SupportType type : new SupportType[] { SupportType.INFO_TEXT, |
448 | SupportType.TEXT }) { | |
449 | BasicSupport support = getSupport(type, url); | |
450 | if (support != null && support.supports(url)) { | |
451 | return support; | |
452 | } | |
453 | } | |
454 | ||
455 | return null; | |
08fe2e33 NR |
456 | } |
457 | ||
458 | /** | |
0ffa4754 | 459 | * Return a {@link BasicSupport} implementation supporting the given type. |
08fe2e33 | 460 | * |
0ffa4754 | 461 | * @param type |
99d71bd7 | 462 | * the type, must not be NULL |
0ffa4754 NR |
463 | * @param url |
464 | * the {@link URL} to support (can be NULL to get an | |
727108fe NR |
465 | * "abstract support"; if not NULL, will be used as the source |
466 | * URL) | |
08fe2e33 | 467 | * |
0ffa4754 | 468 | * @return an implementation that supports it, or NULL |
08fe2e33 | 469 | */ |
0ffa4754 NR |
470 | public static BasicSupport getSupport(SupportType type, URL url) { |
471 | BasicSupport support = null; | |
08fe2e33 | 472 | |
08fe2e33 NR |
473 | switch (type) { |
474 | case EPUB: | |
0ffa4754 NR |
475 | support = new Epub(); |
476 | break; | |
08fe2e33 | 477 | case INFO_TEXT: |
0ffa4754 NR |
478 | support = new InfoText(); |
479 | break; | |
08fe2e33 | 480 | case FIMFICTION: |
315f14ae NR |
481 | try { |
482 | // Can fail if no client key or NO in options | |
0ffa4754 | 483 | support = new FimfictionApi(); |
315f14ae | 484 | } catch (IOException e) { |
0ffa4754 | 485 | support = new Fimfiction(); |
315f14ae | 486 | } |
0ffa4754 | 487 | break; |
08fe2e33 | 488 | case FANFICTION: |
0ffa4754 NR |
489 | support = new Fanfiction(); |
490 | break; | |
08fe2e33 | 491 | case TEXT: |
0ffa4754 NR |
492 | support = new Text(); |
493 | break; | |
413bcc29 NR |
494 | case MANGAHUB: |
495 | support = new MangaHub(); | |
0ffa4754 | 496 | break; |
08fe2e33 | 497 | case E621: |
0ffa4754 NR |
498 | support = new E621(); |
499 | break; | |
a4143cd7 | 500 | case YIFFSTAR: |
0ffa4754 NR |
501 | support = new YiffStar(); |
502 | break; | |
f0608ab1 | 503 | case E_HENTAI: |
0ffa4754 NR |
504 | support = new EHentai(); |
505 | break; | |
af1f506f NR |
506 | case MANGA_LEL: |
507 | support = new MangaLel(); | |
508 | break; | |
08fe2e33 | 509 | case CBZ: |
0ffa4754 NR |
510 | support = new Cbz(); |
511 | break; | |
373da363 | 512 | case HTML: |
0ffa4754 NR |
513 | support = new Html(); |
514 | break; | |
68686a37 NR |
515 | } |
516 | ||
0ffa4754 NR |
517 | if (support != null) { |
518 | support.setType(type); | |
519 | support.source = support.getCanonicalUrl(url); | |
315f14ae NR |
520 | } |
521 | ||
0ffa4754 | 522 | return support; |
315f14ae | 523 | } |
08fe2e33 | 524 | } |