use more template, use replace input stream
[fanfix.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
08fe2e33
NR
3import java.io.IOException;
4import java.io.InputStream;
5cf61f35 5import java.net.MalformedURLException;
08fe2e33 6import java.net.URL;
08fe2e33 7import java.util.ArrayList;
793f1071 8import java.util.Date;
08fe2e33
NR
9import java.util.HashMap;
10import java.util.List;
11import java.util.Map;
5cf61f35 12import java.util.Scanner;
08fe2e33 13import java.util.Map.Entry;
0ffa4754 14
8831d290 15import org.json.JSONException;
5cf61f35 16import org.json.JSONObject;
0ffa4754
NR
17import org.jsoup.helper.DataUtil;
18import org.jsoup.nodes.Document;
19import org.jsoup.nodes.Element;
20import org.jsoup.nodes.Node;
08fe2e33
NR
21
22import be.nikiroo.fanfix.Instance;
08fe2e33
NR
23import be.nikiroo.fanfix.bundles.StringId;
24import be.nikiroo.fanfix.data.Chapter;
25import be.nikiroo.fanfix.data.MetaData;
9252c65e 26import be.nikiroo.fanfix.data.Story;
3b2b638f 27import be.nikiroo.utils.Progress;
08fe2e33
NR
28import be.nikiroo.utils.StringUtils;
29
30/**
31 * This class is the base class used by the other support classes. It can be
32 * used outside of this package, and have static method that you can use to get
33 * access to the correct support class.
34 * <p>
35 * It will be used with 'resources' (usually web pages or files).
36 *
37 * @author niki
38 */
39public abstract class BasicSupport {
0ffa4754
NR
40 private Document sourceNode;
41 private URL source;
08fe2e33 42 private SupportType type;
22848428 43 private URL currentReferer; // with only one 'r', as in 'HTTP'...
8d59ce07
NR
44
45 static protected BasicSupportHelper bsHelper = new BasicSupportHelper();
46 static protected BasicSupportImages bsImages = new BasicSupportImages();
47 static protected BasicSupportPara bsPara = new BasicSupportPara(new BasicSupportHelper(), new BasicSupportImages());
08fe2e33 48
08fe2e33
NR
49 /**
50 * Check if the given resource is supported by this {@link BasicSupport}.
51 *
52 * @param url
53 * the resource to check for
54 *
55 * @return TRUE if it is
56 */
57 protected abstract boolean supports(URL url);
58
59 /**
60 * Return TRUE if the support will return HTML encoded content values for
61 * the chapters content.
62 *
63 * @return TRUE for HTML
64 */
65 protected abstract boolean isHtml();
66
0efd25e3
NR
67 /**
68 * Return the {@link MetaData} of this story.
69 *
776ad3c6 70 * @return the associated {@link MetaData}, never NULL
0efd25e3
NR
71 *
72 * @throws IOException
73 * in case of I/O error
74 */
0ffa4754 75 protected abstract MetaData getMeta() throws IOException;
08fe2e33
NR
76
77 /**
78 * Return the story description.
79 *
08fe2e33
NR
80 * @return the description
81 *
82 * @throws IOException
83 * in case of I/O error
84 */
0ffa4754 85 protected abstract String getDesc() throws IOException;
08fe2e33 86
08fe2e33 87 /**
826e4569 88 * Return the list of chapters (name and resource).
0ffa4754
NR
89 * <p>
90 * Can be NULL if this {@link BasicSupport} do no use chapters.
08fe2e33 91 *
ed08c171
NR
92 * @param pg
93 * the optional progress reporter
08fe2e33 94 *
0ffa4754 95 * @return the chapters or NULL
08fe2e33
NR
96 *
97 * @throws IOException
98 * in case of I/O error
99 */
0ffa4754
NR
100 protected abstract List<Entry<String, URL>> getChapters(Progress pg)
101 throws IOException;
08fe2e33
NR
102
103 /**
104 * Return the content of the chapter (possibly HTML encoded, if
105 * {@link BasicSupport#isHtml()} is TRUE).
106 *
0ffa4754
NR
107 * @param chapUrl
108 * the chapter {@link URL}
08fe2e33
NR
109 * @param number
110 * the chapter number
ed08c171
NR
111 * @param pg
112 * the optional progress reporter
08fe2e33
NR
113 *
114 * @return the content
115 *
116 * @throws IOException
117 * in case of I/O error
118 */
0ffa4754
NR
119 protected abstract String getChapterContent(URL chapUrl, int number,
120 Progress pg) throws IOException;
6e06d2cc 121
08fe2e33
NR
122 /**
123 * Return the list of cookies (values included) that must be used to
124 * correctly fetch the resources.
125 * <p>
126 * You are expected to call the super method implementation if you override
127 * it.
128 *
129 * @return the cookies
130 */
315f14ae 131 public Map<String, String> getCookies() {
08fe2e33
NR
132 return new HashMap<String, String>();
133 }
134
315f14ae
NR
135 /**
136 * OAuth authorisation (aka, "bearer XXXXXXX").
137 *
138 * @return the OAuth string
139 */
140 public String getOAuth() {
141 return null;
142 }
143
a4143cd7
NR
144 /**
145 * Return the canonical form of the main {@link URL}.
146 *
147 * @param source
0ffa4754
NR
148 * the source {@link URL}, which can be NULL
149 *
150 * @return the canonical form of this {@link URL} or NULL if the source was
151 * NULL
152 */
153 protected URL getCanonicalUrl(URL source) {
154 return source;
155 }
156
157 /**
158 * The main {@link Node} for this {@link Story}.
159 *
160 * @return the node
161 */
162 protected Element getSourceNode() {
163 return sourceNode;
164 }
165
166 /**
167 * The main {@link URL} for this {@link Story}.
168 *
169 * @return the URL
170 */
171 protected URL getSource() {
172 return source;
173 }
174
175 /**
176 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
177 * the current {@link URL} we work on.
178 *
179 * @return the referer
180 */
181 public URL getCurrentReferer() {
182 return currentReferer;
183 }
184
185 /**
186 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
187 * the current {@link URL} we work on.
188 *
189 * @param currentReferer
190 * the new referer
191 */
192 protected void setCurrentReferer(URL currentReferer) {
193 this.currentReferer = currentReferer;
194 }
195
196 /**
197 * The support type.
198 *
199 * @return the type
200 */
201 public SupportType getType() {
202 return type;
203 }
204
205 /**
206 * The support type.
207 *
208 * @param type
209 * the new type
210 */
211 protected void setType(SupportType type) {
212 this.type = type;
213 }
214
215 /**
216 * Open an input link that will be used for the support.
217 * <p>
7445f856
NR
218 * Can return NULL, in which case you are supposed to work without a source
219 * node.
0ffa4754
NR
220 *
221 * @param source
a4143cd7
NR
222 * the source {@link URL}
223 *
0ffa4754
NR
224 * @return the {@link InputStream}
225 *
226 * @throws IOException
227 * in case of I/O error
228 */
229 protected Document loadDocument(URL source) throws IOException {
230 String url = getCanonicalUrl(source).toString();
d66deb8d 231 return DataUtil.load(Instance.getInstance().getCache().open(source, this, false), "UTF-8", url.toString());
0ffa4754
NR
232 }
233
234 /**
235 * Log into the support (can be a no-op depending upon the support).
a4143cd7
NR
236 *
237 * @throws IOException
238 * in case of I/O error
239 */
0ffa4754
NR
240 protected void login() throws IOException {
241 }
242
0ffa4754
NR
243 /**
244 * Now that we have processed the {@link Story}, close the resources if any.
245 */
246 protected void close() {
247 setCurrentReferer(null);
a4143cd7
NR
248 }
249
08fe2e33
NR
250 /**
251 * Process the given story resource into a partially filled {@link Story}
252 * object containing the name and metadata.
253 *
0efd25e3
NR
254 * @param getDesc
255 * retrieve the description of the story, or not
ed08c171
NR
256 * @param pg
257 * the optional progress reporter
08fe2e33 258 *
776ad3c6 259 * @return the {@link Story}, never NULL
08fe2e33
NR
260 *
261 * @throws IOException
262 * in case of I/O error
263 */
0ffa4754
NR
264 protected Story processMeta(boolean getDesc, Progress pg)
265 throws IOException {
ed08c171
NR
266 if (pg == null) {
267 pg = new Progress();
268 } else {
269 pg.setMinMax(0, 100);
270 }
271
0ffa4754 272 pg.setProgress(30);
ed08c171 273
0ffa4754
NR
274 Story story = new Story();
275 MetaData meta = getMeta();
bff19b54
NR
276 if (meta.getCreationDate() == null
277 || meta.getCreationDate().trim().isEmpty()) {
278 meta.setCreationDate(bsHelper
279 .formatDate(StringUtils.fromTime(new Date().getTime())));
0ffa4754
NR
280 }
281 story.setMeta(meta);
920af1c7 282 pg.put("meta", meta);
ed08c171 283
0ffa4754 284 pg.setProgress(50);
08fe2e33 285
0ffa4754 286 if (meta.getCover() == null) {
8d59ce07 287 meta.setCover(bsHelper.getDefaultCover(meta.getSubject()));
0ffa4754 288 }
08fe2e33 289
0ffa4754 290 pg.setProgress(60);
a4143cd7 291
0ffa4754 292 if (getDesc) {
d66deb8d
NR
293 String descChapterName = Instance.getInstance().getTrans().getString(StringId.DESCRIPTION);
294 story.getMeta().setResume(bsPara.makeChapter(this, source, 0, descChapterName, //
295 getDesc(), isHtml(), null));
08fe2e33 296 }
0ffa4754 297
fdc55375 298 pg.done();
0ffa4754 299 return story;
08fe2e33
NR
300 }
301
5cf61f35
NR
302 /**
303 * Utility method to convert the given URL into a JSON object.
304 * <p>
305 * Note that this method expects small JSON files (everything is copied into
306 * memory at least twice).
307 *
308 * @param url
309 * the URL to parse
310 * @param stable
311 * TRUE for more stable resources, FALSE when they often change
312 *
313 * @return the JSON object
314 *
315 * @throws IOException
316 * in case of I/O error
317 */
318 protected JSONObject getJson(String url, boolean stable)
319 throws IOException {
320 try {
321 return getJson(new URL(url), stable);
322 } catch (MalformedURLException e) {
323 throw new IOException("Malformed URL: " + url, e);
324 }
325 }
326
327 /**
328 * Utility method to convert the given URL into a JSON object.
329 * <p>
330 * Note that this method expects small JSON files (everything is copied into
331 * memory at least twice).
332 *
333 * @param url
334 * the URL to parse
335 * @param stable
336 * TRUE for more stable resources, FALSE when they often change
337 *
338 * @return the JSON object
339 *
340 * @throws IOException
341 * in case of I/O error
342 */
343 protected JSONObject getJson(URL url, boolean stable) throws IOException {
344 InputStream in = Instance.getInstance().getCache().open(url, null,
345 stable);
346 try {
347 Scanner scan = new Scanner(in);
348 scan.useDelimiter("\0");
349 try {
350 return new JSONObject(scan.next());
8831d290
NR
351 } catch (JSONException e) {
352 throw new IOException(e);
5cf61f35
NR
353 } finally {
354 scan.close();
355 }
356 } finally {
357 in.close();
358 }
359 }
360
9005532f 361 /**
826e4569
NR
362 * Process the given story resource into a fully filled {@link Story}
363 * object.
9005532f
NR
364 *
365 * @param pg
366 * the optional progress reporter
367 *
368 * @return the {@link Story}, never NULL
369 *
370 * @throws IOException
371 * in case of I/O error
372 */
6569afb4 373 // TODO: ADD final when BasicSupport_Deprecated is gone
9005532f
NR
374 public Story process(Progress pg) throws IOException {
375 setCurrentReferer(source);
376 login();
377 sourceNode = loadDocument(source);
378
379 try {
75a6a3ea
NR
380 Story story = doProcess(pg);
381
382 // Check for "no chapters" stories
383 if (story.getChapters().isEmpty()
384 && story.getMeta().getResume() != null
385 && !story.getMeta().getResume().getParagraphs().isEmpty()) {
386 Chapter resume = story.getMeta().getResume();
387 resume.setName("");
388 resume.setNumber(1);
389 story.getChapters().add(resume);
5d190880 390 story.getMeta().setWords(resume.getWords());
75a6a3ea
NR
391
392 String descChapterName = Instance.getInstance().getTrans()
393 .getString(StringId.DESCRIPTION);
394 resume = new Chapter(0, descChapterName);
395 story.getMeta().setResume(resume);
396 }
397
398 return story;
9005532f
NR
399 } finally {
400 close();
401 }
402 }
403
08fe2e33 404 /**
826e4569
NR
405 * Actual processing step, without the calls to other methods.
406 * <p>
407 * Will convert the story resource into a fully filled {@link Story} object.
08fe2e33 408 *
92fb0719
NR
409 * @param pg
410 * the optional progress reporter
08fe2e33 411 *
776ad3c6 412 * @return the {@link Story}, never NULL
08fe2e33
NR
413 *
414 * @throws IOException
415 * in case of I/O error
416 */
826e4569 417 protected Story doProcess(Progress pg) throws IOException {
92fb0719
NR
418 if (pg == null) {
419 pg = new Progress();
420 } else {
421 pg.setMinMax(0, 100);
422 }
3b039231
NR
423
424 pg.setName("Initialising");
92fb0719 425
92fb0719 426 pg.setProgress(1);
9005532f
NR
427 Progress pgMeta = new Progress();
428 pg.addProgress(pgMeta, 10);
429 Story story = processMeta(true, pgMeta);
68328e17 430 pgMeta.done(); // 10%
920af1c7 431 pg.put("meta", story.getMeta());
ed08c171 432
9005532f
NR
433 Progress pgGetChapters = new Progress();
434 pg.addProgress(pgGetChapters, 10);
435 story.setChapters(new ArrayList<Chapter>());
436 List<Entry<String, URL>> chapters = getChapters(pgGetChapters);
68328e17 437 pgGetChapters.done(); // 20%
5d190880 438
9005532f
NR
439 if (chapters != null) {
440 Progress pgChaps = new Progress("Extracting chapters", 0,
441 chapters.size() * 300);
442 pg.addProgress(pgChaps, 80);
443
444 long words = 0;
445 int i = 1;
446 for (Entry<String, URL> chap : chapters) {
447 pgChaps.setName("Extracting chapter " + i);
448 URL chapUrl = chap.getValue();
449 String chapName = chap.getKey();
450 if (chapUrl != null) {
451 setCurrentReferer(chapUrl);
452 }
453
454 pgChaps.setProgress(i * 100);
455 Progress pgGetChapterContent = new Progress();
456 Progress pgMakeChapter = new Progress();
457 pgChaps.addProgress(pgGetChapterContent, 100);
458 pgChaps.addProgress(pgMakeChapter, 100);
459
460 String content = getChapterContent(chapUrl, i,
461 pgGetChapterContent);
68328e17 462 pgGetChapterContent.done();
8d59ce07 463 Chapter cc = bsPara.makeChapter(this, chapUrl, i,
9005532f 464 chapName, content, isHtml(), pgMakeChapter);
68328e17 465 pgMakeChapter.done();
ed08c171 466
9005532f
NR
467 words += cc.getWords();
468 story.getChapters().add(cc);
9005532f
NR
469
470 i++;
08fe2e33 471 }
5d190880
NR
472
473 story.getMeta().setWords(words);
08fe2e33 474
9005532f 475 pgChaps.setName("Extracting chapters");
fdc55375 476 pgChaps.done();
08fe2e33 477 }
9005532f 478
68328e17
NR
479 pg.done();
480
9005532f 481 return story;
08fe2e33
NR
482 }
483
99d71bd7
NR
484 /**
485 * Create a chapter from the given data.
486 *
487 * @param source
488 * the source URL for this content, which can be used to try and
489 * find images if images are present in the format [image-url]
490 * @param number
491 * the chapter number (0 = description)
492 * @param name
493 * the chapter name
494 * @param content
495 * the content of the chapter
75a6a3ea
NR
496 *
497 * @return the {@link Chapter}, never NULL
99d71bd7
NR
498 *
499 * @throws IOException
500 * in case of I/O error
501 */
502 public Chapter makeChapter(URL source, int number, String name,
503 String content) throws IOException {
8d59ce07 504 return bsPara.makeChapter(this, source, number, name,
99d71bd7
NR
505 content, isHtml(), null);
506 }
507
08fe2e33 508 /**
0ffa4754
NR
509 * Return a {@link BasicSupport} implementation supporting the given
510 * resource if possible.
08fe2e33 511 *
0ffa4754
NR
512 * @param url
513 * the story resource
08fe2e33 514 *
0ffa4754 515 * @return an implementation that supports it, or NULL
08fe2e33 516 */
0ffa4754
NR
517 public static BasicSupport getSupport(URL url) {
518 if (url == null) {
519 return null;
520 }
08fe2e33 521
0ffa4754
NR
522 // TEXT and INFO_TEXT always support files (not URLs though)
523 for (SupportType type : SupportType.values()) {
524 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
525 BasicSupport support = getSupport(type, url);
526 if (support != null && support.supports(url)) {
527 return support;
528 }
529 }
530 }
08fe2e33 531
0ffa4754
NR
532 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
533 SupportType.TEXT }) {
534 BasicSupport support = getSupport(type, url);
535 if (support != null && support.supports(url)) {
536 return support;
537 }
538 }
539
540 return null;
08fe2e33
NR
541 }
542
543 /**
0ffa4754 544 * Return a {@link BasicSupport} implementation supporting the given type.
08fe2e33 545 *
0ffa4754 546 * @param type
99d71bd7 547 * the type, must not be NULL
0ffa4754
NR
548 * @param url
549 * the {@link URL} to support (can be NULL to get an
727108fe
NR
550 * "abstract support"; if not NULL, will be used as the source
551 * URL)
08fe2e33 552 *
0ffa4754 553 * @return an implementation that supports it, or NULL
08fe2e33 554 */
0ffa4754
NR
555 public static BasicSupport getSupport(SupportType type, URL url) {
556 BasicSupport support = null;
08fe2e33 557
08fe2e33
NR
558 switch (type) {
559 case EPUB:
0ffa4754
NR
560 support = new Epub();
561 break;
08fe2e33 562 case INFO_TEXT:
0ffa4754
NR
563 support = new InfoText();
564 break;
08fe2e33 565 case FIMFICTION:
315f14ae
NR
566 try {
567 // Can fail if no client key or NO in options
0ffa4754 568 support = new FimfictionApi();
315f14ae 569 } catch (IOException e) {
0ffa4754 570 support = new Fimfiction();
315f14ae 571 }
0ffa4754 572 break;
08fe2e33 573 case FANFICTION:
0ffa4754
NR
574 support = new Fanfiction();
575 break;
08fe2e33 576 case TEXT:
0ffa4754
NR
577 support = new Text();
578 break;
413bcc29
NR
579 case MANGAHUB:
580 support = new MangaHub();
0ffa4754 581 break;
08fe2e33 582 case E621:
0ffa4754
NR
583 support = new E621();
584 break;
a4143cd7 585 case YIFFSTAR:
0ffa4754
NR
586 support = new YiffStar();
587 break;
f0608ab1 588 case E_HENTAI:
0ffa4754
NR
589 support = new EHentai();
590 break;
af1f506f
NR
591 case MANGA_LEL:
592 support = new MangaLel();
593 break;
08fe2e33 594 case CBZ:
0ffa4754
NR
595 support = new Cbz();
596 break;
373da363 597 case HTML:
0ffa4754
NR
598 support = new Html();
599 break;
68686a37
NR
600 }
601
0ffa4754
NR
602 if (support != null) {
603 support.setType(type);
604 support.source = support.getCanonicalUrl(url);
315f14ae
NR
605 }
606
0ffa4754 607 return support;
315f14ae 608 }
08fe2e33 609}