code cleanup / jdoc
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.MalformedURLException;
6 import java.net.URL;
7 import java.util.ArrayList;
8 import java.util.Date;
9 import java.util.HashMap;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Scanner;
13 import java.util.Map.Entry;
14
15 import org.json.JSONException;
16 import org.json.JSONObject;
17 import org.jsoup.helper.DataUtil;
18 import org.jsoup.nodes.Document;
19 import org.jsoup.nodes.Element;
20 import org.jsoup.nodes.Node;
21
22 import be.nikiroo.fanfix.Instance;
23 import be.nikiroo.fanfix.bundles.StringId;
24 import be.nikiroo.fanfix.data.Chapter;
25 import be.nikiroo.fanfix.data.MetaData;
26 import be.nikiroo.fanfix.data.Story;
27 import be.nikiroo.utils.Progress;
28 import be.nikiroo.utils.StringUtils;
29
30 /**
31 * This class is the base class used by the other support classes. It can be
32 * used outside of this package, and have static method that you can use to get
33 * access to the correct support class.
34 * <p>
35 * It will be used with 'resources' (usually web pages or files).
36 *
37 * @author niki
38 */
39 public abstract class BasicSupport {
40 private Document sourceNode;
41 private URL source;
42 private SupportType type;
43 private URL currentReferer; // with only one 'r', as in 'HTTP'...
44
45 static protected BasicSupportHelper bsHelper = new BasicSupportHelper();
46 static protected BasicSupportImages bsImages = new BasicSupportImages();
47 static protected BasicSupportPara bsPara = new BasicSupportPara(new BasicSupportHelper(), new BasicSupportImages());
48
49 /**
50 * Check if the given resource is supported by this {@link BasicSupport}.
51 *
52 * @param url
53 * the resource to check for
54 *
55 * @return TRUE if it is
56 */
57 protected abstract boolean supports(URL url);
58
59 /**
60 * Return TRUE if the support will return HTML encoded content values for
61 * the chapters content.
62 *
63 * @return TRUE for HTML
64 */
65 protected abstract boolean isHtml();
66
67 /**
68 * Return the {@link MetaData} of this story.
69 *
70 * @return the associated {@link MetaData}, never NULL
71 *
72 * @throws IOException
73 * in case of I/O error
74 */
75 protected abstract MetaData getMeta() throws IOException;
76
77 /**
78 * Return the story description.
79 *
80 * @return the description
81 *
82 * @throws IOException
83 * in case of I/O error
84 */
85 protected abstract String getDesc() throws IOException;
86
87 /**
88 * Return the list of chapters (name and resource).
89 * <p>
90 * Can be NULL if this {@link BasicSupport} do no use chapters.
91 *
92 * @param pg
93 * the optional progress reporter
94 *
95 * @return the chapters or NULL
96 *
97 * @throws IOException
98 * in case of I/O error
99 */
100 protected abstract List<Entry<String, URL>> getChapters(Progress pg)
101 throws IOException;
102
103 /**
104 * Return the content of the chapter (possibly HTML encoded, if
105 * {@link BasicSupport#isHtml()} is TRUE).
106 *
107 * @param chapUrl
108 * the chapter {@link URL}
109 * @param number
110 * the chapter number
111 * @param pg
112 * the optional progress reporter
113 *
114 * @return the content
115 *
116 * @throws IOException
117 * in case of I/O error
118 */
119 protected abstract String getChapterContent(URL chapUrl, int number,
120 Progress pg) throws IOException;
121
122 /**
123 * Return the list of cookies (values included) that must be used to
124 * correctly fetch the resources.
125 * <p>
126 * You are expected to call the super method implementation if you override
127 * it.
128 *
129 * @return the cookies
130 */
131 public Map<String, String> getCookies() {
132 return new HashMap<String, String>();
133 }
134
135 /**
136 * OAuth authorisation (aka, "bearer XXXXXXX").
137 *
138 * @return the OAuth string
139 */
140 public String getOAuth() {
141 return null;
142 }
143
144 /**
145 * Return the canonical form of the main {@link URL}.
146 *
147 * @param source
148 * the source {@link URL}, which can be NULL
149 *
150 * @return the canonical form of this {@link URL} or NULL if the source was
151 * NULL
152 */
153 protected URL getCanonicalUrl(URL source) {
154 return source;
155 }
156
157 /**
158 * The main {@link Node} for this {@link Story}.
159 *
160 * @return the node
161 */
162 protected Element getSourceNode() {
163 return sourceNode;
164 }
165
166 /**
167 * The main {@link URL} for this {@link Story}.
168 *
169 * @return the URL
170 */
171 protected URL getSource() {
172 return source;
173 }
174
175 /**
176 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
177 * the current {@link URL} we work on.
178 *
179 * @return the referer
180 */
181 public URL getCurrentReferer() {
182 return currentReferer;
183 }
184
185 /**
186 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
187 * the current {@link URL} we work on.
188 *
189 * @param currentReferer
190 * the new referer
191 */
192 protected void setCurrentReferer(URL currentReferer) {
193 this.currentReferer = currentReferer;
194 }
195
196 /**
197 * The support type.
198 *
199 * @return the type
200 */
201 public SupportType getType() {
202 return type;
203 }
204
205 /**
206 * The support type.
207 *
208 * @param type
209 * the new type
210 */
211 protected void setType(SupportType type) {
212 this.type = type;
213 }
214
215 /**
216 * Open an input link that will be used for the support.
217 * <p>
218 * Can return NULL, in which case you are supposed to work without a source
219 * node.
220 *
221 * @param source
222 * the source {@link URL}
223 *
224 * @return the {@link InputStream}
225 *
226 * @throws IOException
227 * in case of I/O error
228 */
229 protected Document loadDocument(URL source) throws IOException {
230 String url = getCanonicalUrl(source).toString();
231 return DataUtil.load(Instance.getInstance().getCache().open(source, this, false), "UTF-8", url.toString());
232 }
233
234 /**
235 * Log into the support (can be a no-op depending upon the support).
236 *
237 * @throws IOException
238 * in case of I/O error
239 */
240 protected void login() throws IOException {
241 }
242
243 /**
244 * Now that we have processed the {@link Story}, close the resources if any.
245 */
246 protected void close() {
247 setCurrentReferer(null);
248 }
249
250 /**
251 * Process the given story resource into a partially filled {@link Story}
252 * object containing the name and metadata.
253 *
254 * @param getDesc
255 * retrieve the description of the story, or not
256 * @param pg
257 * the optional progress reporter
258 *
259 * @return the {@link Story}, never NULL
260 *
261 * @throws IOException
262 * in case of I/O error
263 */
264 protected Story processMeta(boolean getDesc, Progress pg)
265 throws IOException {
266 if (pg == null) {
267 pg = new Progress();
268 } else {
269 pg.setMinMax(0, 100);
270 }
271
272 pg.setProgress(30);
273
274 Story story = new Story();
275
276 MetaData meta = getMeta();
277 meta.setType(getType().toString());
278 meta.setSource(getType().getSourceName());
279 meta.setPublisher(getType().getSourceName());
280
281 if (meta.getCreationDate() == null
282 || meta.getCreationDate().trim().isEmpty()) {
283 meta.setCreationDate(bsHelper
284 .formatDate(StringUtils.fromTime(new Date().getTime())));
285 }
286 story.setMeta(meta);
287 pg.put("meta", meta);
288
289 pg.setProgress(50);
290
291 if (meta.getCover() == null) {
292 meta.setCover(bsHelper.getDefaultCover(meta.getSubject()));
293 }
294
295 pg.setProgress(60);
296
297 if (getDesc) {
298 String descChapterName = Instance.getInstance().getTrans().getString(StringId.DESCRIPTION);
299 story.getMeta().setResume(bsPara.makeChapter(this, source, 0, descChapterName, //
300 getDesc(), isHtml(), null));
301 }
302
303 pg.done();
304 return story;
305 }
306
307 /**
308 * Utility method to convert the given URL into a JSON object.
309 * <p>
310 * Note that this method expects small JSON files (everything is copied into
311 * memory at least twice).
312 *
313 * @param url
314 * the URL to parse
315 * @param stable
316 * TRUE for more stable resources, FALSE when they often change
317 *
318 * @return the JSON object
319 *
320 * @throws IOException
321 * in case of I/O error
322 */
323 protected JSONObject getJson(String url, boolean stable)
324 throws IOException {
325 try {
326 return getJson(new URL(url), stable);
327 } catch (MalformedURLException e) {
328 throw new IOException("Malformed URL: " + url, e);
329 }
330 }
331
332 /**
333 * Utility method to convert the given URL into a JSON object.
334 * <p>
335 * Note that this method expects small JSON files (everything is copied into
336 * memory at least twice).
337 *
338 * @param url
339 * the URL to parse
340 * @param stable
341 * TRUE for more stable resources, FALSE when they often change
342 *
343 * @return the JSON object
344 *
345 * @throws IOException
346 * in case of I/O error
347 */
348 protected JSONObject getJson(URL url, boolean stable) throws IOException {
349 InputStream in = Instance.getInstance().getCache().open(url, null,
350 stable);
351 try {
352 Scanner scan = new Scanner(in);
353 scan.useDelimiter("\0");
354 try {
355 return new JSONObject(scan.next());
356 } catch (JSONException e) {
357 throw new IOException(e);
358 } finally {
359 scan.close();
360 }
361 } finally {
362 in.close();
363 }
364 }
365
366 /**
367 * Process the given story resource into a fully filled {@link Story}
368 * object.
369 *
370 * @param pg
371 * the optional progress reporter
372 *
373 * @return the {@link Story}, never NULL
374 *
375 * @throws IOException
376 * in case of I/O error
377 */
378 // TODO: ADD final when BasicSupport_Deprecated is gone
379 public Story process(Progress pg) throws IOException {
380 setCurrentReferer(source);
381 login();
382 sourceNode = loadDocument(source);
383
384 try {
385 Story story = doProcess(pg);
386
387 // Check for "no chapters" stories
388 if (story.getChapters().isEmpty()
389 && story.getMeta().getResume() != null
390 && !story.getMeta().getResume().getParagraphs().isEmpty()) {
391 Chapter resume = story.getMeta().getResume();
392 resume.setName("");
393 resume.setNumber(1);
394 story.getChapters().add(resume);
395 story.getMeta().setWords(resume.getWords());
396
397 String descChapterName = Instance.getInstance().getTrans()
398 .getString(StringId.DESCRIPTION);
399 resume = new Chapter(0, descChapterName);
400 story.getMeta().setResume(resume);
401 }
402
403 return story;
404 } finally {
405 close();
406 }
407 }
408
409 /**
410 * Actual processing step, without the calls to other methods.
411 * <p>
412 * Will convert the story resource into a fully filled {@link Story} object.
413 *
414 * @param pg
415 * the optional progress reporter
416 *
417 * @return the {@link Story}, never NULL
418 *
419 * @throws IOException
420 * in case of I/O error
421 */
422 protected Story doProcess(Progress pg) throws IOException {
423 if (pg == null) {
424 pg = new Progress();
425 } else {
426 pg.setMinMax(0, 100);
427 }
428
429 pg.setName("Initialising");
430
431 pg.setProgress(1);
432 Progress pgMeta = new Progress();
433 pg.addProgress(pgMeta, 10);
434 Story story = processMeta(true, pgMeta);
435 pgMeta.done(); // 10%
436 pg.put("meta", story.getMeta());
437
438 Progress pgGetChapters = new Progress();
439 pg.addProgress(pgGetChapters, 10);
440 story.setChapters(new ArrayList<Chapter>());
441 List<Entry<String, URL>> chapters = getChapters(pgGetChapters);
442 pgGetChapters.done(); // 20%
443
444 if (chapters != null) {
445 Progress pgChaps = new Progress("Extracting chapters", 0,
446 chapters.size() * 300);
447 pg.addProgress(pgChaps, 80);
448
449 long words = 0;
450 int i = 1;
451 for (Entry<String, URL> chap : chapters) {
452 pgChaps.setName("Extracting chapter " + i);
453 URL chapUrl = chap.getValue();
454 String chapName = chap.getKey();
455 if (chapUrl != null) {
456 setCurrentReferer(chapUrl);
457 }
458
459 pgChaps.setProgress(i * 100);
460 Progress pgGetChapterContent = new Progress();
461 Progress pgMakeChapter = new Progress();
462 pgChaps.addProgress(pgGetChapterContent, 100);
463 pgChaps.addProgress(pgMakeChapter, 100);
464
465 String content = getChapterContent(chapUrl, i,
466 pgGetChapterContent);
467 pgGetChapterContent.done();
468 Chapter cc = bsPara.makeChapter(this, chapUrl, i,
469 chapName, content, isHtml(), pgMakeChapter);
470 pgMakeChapter.done();
471
472 words += cc.getWords();
473 story.getChapters().add(cc);
474
475 i++;
476 }
477
478 story.getMeta().setWords(words);
479
480 pgChaps.setName("Extracting chapters");
481 pgChaps.done();
482 }
483
484 pg.done();
485
486 return story;
487 }
488
489 /**
490 * Create a chapter from the given data.
491 *
492 * @param source
493 * the source URL for this content, which can be used to try and
494 * find images if images are present in the format [image-url]
495 * @param number
496 * the chapter number (0 = description)
497 * @param name
498 * the chapter name
499 * @param content
500 * the content of the chapter
501 *
502 * @return the {@link Chapter}, never NULL
503 *
504 * @throws IOException
505 * in case of I/O error
506 */
507 public Chapter makeChapter(URL source, int number, String name,
508 String content) throws IOException {
509 return bsPara.makeChapter(this, source, number, name,
510 content, isHtml(), null);
511 }
512
513 /**
514 * Return a {@link BasicSupport} implementation supporting the given
515 * resource if possible.
516 *
517 * @param url
518 * the story resource
519 *
520 * @return an implementation that supports it, or NULL
521 */
522 public static BasicSupport getSupport(URL url) {
523 if (url == null) {
524 return null;
525 }
526
527 // TEXT and INFO_TEXT always support files (not URLs though)
528 for (SupportType type : SupportType.values()) {
529 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
530 BasicSupport support = getSupport(type, url);
531 if (support != null && support.supports(url)) {
532 return support;
533 }
534 }
535 }
536
537 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
538 SupportType.TEXT }) {
539 BasicSupport support = getSupport(type, url);
540 if (support != null && support.supports(url)) {
541 return support;
542 }
543 }
544
545 return null;
546 }
547
548 /**
549 * Return a {@link BasicSupport} implementation supporting the given type.
550 *
551 * @param type
552 * the type, must not be NULL
553 * @param url
554 * the {@link URL} to support (can be NULL to get an
555 * "abstract support"; if not NULL, will be used as the source
556 * URL)
557 *
558 * @return an implementation that supports it, or NULL
559 */
560 public static BasicSupport getSupport(SupportType type, URL url) {
561 BasicSupport support = null;
562
563 switch (type) {
564 case EPUB:
565 support = new Epub();
566 break;
567 case INFO_TEXT:
568 support = new InfoText();
569 break;
570 case FIMFICTION:
571 try {
572 // Can fail if no client key or NO in options
573 support = new FimfictionApi();
574 } catch (IOException e) {
575 support = new Fimfiction();
576 }
577 break;
578 case FANFICTION:
579 support = new Fanfiction();
580 break;
581 case TEXT:
582 support = new Text();
583 break;
584 case MANGAHUB:
585 support = new MangaHub();
586 break;
587 case E621:
588 support = new E621();
589 break;
590 case YIFFSTAR:
591 support = new YiffStar();
592 break;
593 case E_HENTAI:
594 support = new EHentai();
595 break;
596 case MANGA_LEL:
597 support = new MangaLel();
598 break;
599 case CBZ:
600 support = new Cbz();
601 break;
602 case HTML:
603 support = new Html();
604 break;
605 }
606
607 if (support != null) {
608 support.setType(type);
609 support.source = support.getCanonicalUrl(url);
610 }
611
612 return support;
613 }
614 }