e621: use api key
[nikiroo-utils.git] / supported / BasicSupport.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.MalformedURLException;
6 import java.net.URL;
7 import java.util.ArrayList;
8 import java.util.Date;
9 import java.util.HashMap;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Scanner;
13 import java.util.Map.Entry;
14
15 import org.json.JSONObject;
16 import org.jsoup.helper.DataUtil;
17 import org.jsoup.nodes.Document;
18 import org.jsoup.nodes.Element;
19 import org.jsoup.nodes.Node;
20
21 import be.nikiroo.fanfix.Instance;
22 import be.nikiroo.fanfix.bundles.StringId;
23 import be.nikiroo.fanfix.data.Chapter;
24 import be.nikiroo.fanfix.data.MetaData;
25 import be.nikiroo.fanfix.data.Story;
26 import be.nikiroo.utils.Progress;
27 import be.nikiroo.utils.StringUtils;
28
29 /**
30 * This class is the base class used by the other support classes. It can be
31 * used outside of this package, and have static method that you can use to get
32 * access to the correct support class.
33 * <p>
34 * It will be used with 'resources' (usually web pages or files).
35 *
36 * @author niki
37 */
38 public abstract class BasicSupport {
39 private Document sourceNode;
40 private URL source;
41 private SupportType type;
42 private URL currentReferer; // with only one 'r', as in 'HTTP'...
43
44 static protected BasicSupportHelper bsHelper = new BasicSupportHelper();
45 static protected BasicSupportImages bsImages = new BasicSupportImages();
46 static protected BasicSupportPara bsPara = new BasicSupportPara(new BasicSupportHelper(), new BasicSupportImages());
47
48 /**
49 * Check if the given resource is supported by this {@link BasicSupport}.
50 *
51 * @param url
52 * the resource to check for
53 *
54 * @return TRUE if it is
55 */
56 protected abstract boolean supports(URL url);
57
58 /**
59 * Return TRUE if the support will return HTML encoded content values for
60 * the chapters content.
61 *
62 * @return TRUE for HTML
63 */
64 protected abstract boolean isHtml();
65
66 /**
67 * Return the {@link MetaData} of this story.
68 *
69 * @return the associated {@link MetaData}, never NULL
70 *
71 * @throws IOException
72 * in case of I/O error
73 */
74 protected abstract MetaData getMeta() throws IOException;
75
76 /**
77 * Return the story description.
78 *
79 * @return the description
80 *
81 * @throws IOException
82 * in case of I/O error
83 */
84 protected abstract String getDesc() throws IOException;
85
86 /**
87 * Return the list of chapters (name and resource).
88 * <p>
89 * Can be NULL if this {@link BasicSupport} do no use chapters.
90 *
91 * @param pg
92 * the optional progress reporter
93 *
94 * @return the chapters or NULL
95 *
96 * @throws IOException
97 * in case of I/O error
98 */
99 protected abstract List<Entry<String, URL>> getChapters(Progress pg)
100 throws IOException;
101
102 /**
103 * Return the content of the chapter (possibly HTML encoded, if
104 * {@link BasicSupport#isHtml()} is TRUE).
105 *
106 * @param chapUrl
107 * the chapter {@link URL}
108 * @param number
109 * the chapter number
110 * @param pg
111 * the optional progress reporter
112 *
113 * @return the content
114 *
115 * @throws IOException
116 * in case of I/O error
117 */
118 protected abstract String getChapterContent(URL chapUrl, int number,
119 Progress pg) throws IOException;
120
121 /**
122 * Return the list of cookies (values included) that must be used to
123 * correctly fetch the resources.
124 * <p>
125 * You are expected to call the super method implementation if you override
126 * it.
127 *
128 * @return the cookies
129 */
130 public Map<String, String> getCookies() {
131 return new HashMap<String, String>();
132 }
133
134 /**
135 * OAuth authorisation (aka, "bearer XXXXXXX").
136 *
137 * @return the OAuth string
138 */
139 public String getOAuth() {
140 return null;
141 }
142
143 /**
144 * Return the canonical form of the main {@link URL}.
145 *
146 * @param source
147 * the source {@link URL}, which can be NULL
148 *
149 * @return the canonical form of this {@link URL} or NULL if the source was
150 * NULL
151 */
152 protected URL getCanonicalUrl(URL source) {
153 return source;
154 }
155
156 /**
157 * The main {@link Node} for this {@link Story}.
158 *
159 * @return the node
160 */
161 protected Element getSourceNode() {
162 return sourceNode;
163 }
164
165 /**
166 * The main {@link URL} for this {@link Story}.
167 *
168 * @return the URL
169 */
170 protected URL getSource() {
171 return source;
172 }
173
174 /**
175 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
176 * the current {@link URL} we work on.
177 *
178 * @return the referer
179 */
180 public URL getCurrentReferer() {
181 return currentReferer;
182 }
183
184 /**
185 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
186 * the current {@link URL} we work on.
187 *
188 * @param currentReferer
189 * the new referer
190 */
191 protected void setCurrentReferer(URL currentReferer) {
192 this.currentReferer = currentReferer;
193 }
194
195 /**
196 * The support type.
197 *
198 * @return the type
199 */
200 public SupportType getType() {
201 return type;
202 }
203
204 /**
205 * The support type.
206 *
207 * @param type
208 * the new type
209 */
210 protected void setType(SupportType type) {
211 this.type = type;
212 }
213
214 /**
215 * Open an input link that will be used for the support.
216 * <p>
217 * Can return NULL, in which case you are supposed to work without a source
218 * node.
219 *
220 * @param source
221 * the source {@link URL}
222 *
223 * @return the {@link InputStream}
224 *
225 * @throws IOException
226 * in case of I/O error
227 */
228 protected Document loadDocument(URL source) throws IOException {
229 String url = getCanonicalUrl(source).toString();
230 return DataUtil.load(Instance.getInstance().getCache().open(source, this, false), "UTF-8", url.toString());
231 }
232
233 /**
234 * Log into the support (can be a no-op depending upon the support).
235 *
236 * @throws IOException
237 * in case of I/O error
238 */
239 protected void login() throws IOException {
240 }
241
242 /**
243 * Now that we have processed the {@link Story}, close the resources if any.
244 */
245 protected void close() {
246 setCurrentReferer(null);
247 }
248
249 /**
250 * Process the given story resource into a partially filled {@link Story}
251 * object containing the name and metadata.
252 *
253 * @param getDesc
254 * retrieve the description of the story, or not
255 * @param pg
256 * the optional progress reporter
257 *
258 * @return the {@link Story}, never NULL
259 *
260 * @throws IOException
261 * in case of I/O error
262 */
263 protected Story processMeta(boolean getDesc, Progress pg)
264 throws IOException {
265 if (pg == null) {
266 pg = new Progress();
267 } else {
268 pg.setMinMax(0, 100);
269 }
270
271 pg.setProgress(30);
272
273 Story story = new Story();
274 MetaData meta = getMeta();
275 if (meta.getCreationDate() == null || meta.getCreationDate().isEmpty()) {
276 meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
277 }
278 story.setMeta(meta);
279 pg.put("meta", meta);
280
281 pg.setProgress(50);
282
283 if (meta.getCover() == null) {
284 meta.setCover(bsHelper.getDefaultCover(meta.getSubject()));
285 }
286
287 pg.setProgress(60);
288
289 if (getDesc) {
290 String descChapterName = Instance.getInstance().getTrans().getString(StringId.DESCRIPTION);
291 story.getMeta().setResume(bsPara.makeChapter(this, source, 0, descChapterName, //
292 getDesc(), isHtml(), null));
293 }
294
295 pg.done();
296 return story;
297 }
298
299 /**
300 * Utility method to convert the given URL into a JSON object.
301 * <p>
302 * Note that this method expects small JSON files (everything is copied into
303 * memory at least twice).
304 *
305 * @param url
306 * the URL to parse
307 * @param stable
308 * TRUE for more stable resources, FALSE when they often change
309 *
310 * @return the JSON object
311 *
312 * @throws IOException
313 * in case of I/O error
314 */
315 protected JSONObject getJson(String url, boolean stable)
316 throws IOException {
317 try {
318 return getJson(new URL(url), stable);
319 } catch (MalformedURLException e) {
320 throw new IOException("Malformed URL: " + url, e);
321 }
322 }
323
324 /**
325 * Utility method to convert the given URL into a JSON object.
326 * <p>
327 * Note that this method expects small JSON files (everything is copied into
328 * memory at least twice).
329 *
330 * @param url
331 * the URL to parse
332 * @param stable
333 * TRUE for more stable resources, FALSE when they often change
334 *
335 * @return the JSON object
336 *
337 * @throws IOException
338 * in case of I/O error
339 */
340 protected JSONObject getJson(URL url, boolean stable) throws IOException {
341 InputStream in = Instance.getInstance().getCache().open(url, null,
342 stable);
343 try {
344 Scanner scan = new Scanner(in);
345 scan.useDelimiter("\0");
346 try {
347 return new JSONObject(scan.next());
348 } finally {
349 scan.close();
350 }
351 } finally {
352 in.close();
353 }
354 }
355
356 /**
357 * Process the given story resource into a fully filled {@link Story}
358 * object.
359 *
360 * @param pg
361 * the optional progress reporter
362 *
363 * @return the {@link Story}, never NULL
364 *
365 * @throws IOException
366 * in case of I/O error
367 */
368 // TODO: ADD final when BasicSupport_Deprecated is gone
369 public Story process(Progress pg) throws IOException {
370 setCurrentReferer(source);
371 login();
372 sourceNode = loadDocument(source);
373
374 try {
375 return doProcess(pg);
376 } finally {
377 close();
378 }
379 }
380
381 /**
382 * Actual processing step, without the calls to other methods.
383 * <p>
384 * Will convert the story resource into a fully filled {@link Story} object.
385 *
386 * @param pg
387 * the optional progress reporter
388 *
389 * @return the {@link Story}, never NULL
390 *
391 * @throws IOException
392 * in case of I/O error
393 */
394 protected Story doProcess(Progress pg) throws IOException {
395 if (pg == null) {
396 pg = new Progress();
397 } else {
398 pg.setMinMax(0, 100);
399 }
400
401 pg.setName("Initialising");
402
403 pg.setProgress(1);
404 Progress pgMeta = new Progress();
405 pg.addProgress(pgMeta, 10);
406 Story story = processMeta(true, pgMeta);
407 pgMeta.done(); // 10%
408 pg.put("meta", story.getMeta());
409
410 Progress pgGetChapters = new Progress();
411 pg.addProgress(pgGetChapters, 10);
412 story.setChapters(new ArrayList<Chapter>());
413 List<Entry<String, URL>> chapters = getChapters(pgGetChapters);
414 pgGetChapters.done(); // 20%
415
416 if (chapters != null) {
417 Progress pgChaps = new Progress("Extracting chapters", 0,
418 chapters.size() * 300);
419 pg.addProgress(pgChaps, 80);
420
421 long words = 0;
422 int i = 1;
423 for (Entry<String, URL> chap : chapters) {
424 pgChaps.setName("Extracting chapter " + i);
425 URL chapUrl = chap.getValue();
426 String chapName = chap.getKey();
427 if (chapUrl != null) {
428 setCurrentReferer(chapUrl);
429 }
430
431 pgChaps.setProgress(i * 100);
432 Progress pgGetChapterContent = new Progress();
433 Progress pgMakeChapter = new Progress();
434 pgChaps.addProgress(pgGetChapterContent, 100);
435 pgChaps.addProgress(pgMakeChapter, 100);
436
437 String content = getChapterContent(chapUrl, i,
438 pgGetChapterContent);
439 pgGetChapterContent.done();
440 Chapter cc = bsPara.makeChapter(this, chapUrl, i,
441 chapName, content, isHtml(), pgMakeChapter);
442 pgMakeChapter.done();
443
444 words += cc.getWords();
445 story.getChapters().add(cc);
446 story.getMeta().setWords(words);
447
448 i++;
449 }
450
451 pgChaps.setName("Extracting chapters");
452 pgChaps.done();
453 }
454
455 pg.done();
456
457 return story;
458 }
459
460 /**
461 * Create a chapter from the given data.
462 *
463 * @param source
464 * the source URL for this content, which can be used to try and
465 * find images if images are present in the format [image-url]
466 * @param number
467 * the chapter number (0 = description)
468 * @param name
469 * the chapter name
470 * @param content
471 * the content of the chapter
472 * @return the {@link Chapter}
473 *
474 * @throws IOException
475 * in case of I/O error
476 */
477 public Chapter makeChapter(URL source, int number, String name,
478 String content) throws IOException {
479 return bsPara.makeChapter(this, source, number, name,
480 content, isHtml(), null);
481 }
482
483 /**
484 * Return a {@link BasicSupport} implementation supporting the given
485 * resource if possible.
486 *
487 * @param url
488 * the story resource
489 *
490 * @return an implementation that supports it, or NULL
491 */
492 public static BasicSupport getSupport(URL url) {
493 if (url == null) {
494 return null;
495 }
496
497 // TEXT and INFO_TEXT always support files (not URLs though)
498 for (SupportType type : SupportType.values()) {
499 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
500 BasicSupport support = getSupport(type, url);
501 if (support != null && support.supports(url)) {
502 return support;
503 }
504 }
505 }
506
507 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
508 SupportType.TEXT }) {
509 BasicSupport support = getSupport(type, url);
510 if (support != null && support.supports(url)) {
511 return support;
512 }
513 }
514
515 return null;
516 }
517
518 /**
519 * Return a {@link BasicSupport} implementation supporting the given type.
520 *
521 * @param type
522 * the type, must not be NULL
523 * @param url
524 * the {@link URL} to support (can be NULL to get an
525 * "abstract support"; if not NULL, will be used as the source
526 * URL)
527 *
528 * @return an implementation that supports it, or NULL
529 */
530 public static BasicSupport getSupport(SupportType type, URL url) {
531 BasicSupport support = null;
532
533 switch (type) {
534 case EPUB:
535 support = new Epub();
536 break;
537 case INFO_TEXT:
538 support = new InfoText();
539 break;
540 case FIMFICTION:
541 try {
542 // Can fail if no client key or NO in options
543 support = new FimfictionApi();
544 } catch (IOException e) {
545 support = new Fimfiction();
546 }
547 break;
548 case FANFICTION:
549 support = new Fanfiction();
550 break;
551 case TEXT:
552 support = new Text();
553 break;
554 case MANGAHUB:
555 support = new MangaHub();
556 break;
557 case E621:
558 support = new E621();
559 break;
560 case YIFFSTAR:
561 support = new YiffStar();
562 break;
563 case E_HENTAI:
564 support = new EHentai();
565 break;
566 case MANGA_LEL:
567 support = new MangaLel();
568 break;
569 case CBZ:
570 support = new Cbz();
571 break;
572 case HTML:
573 support = new Html();
574 break;
575 }
576
577 if (support != null) {
578 support.setType(type);
579 support.source = support.getCanonicalUrl(url);
580 }
581
582 return support;
583 }
584 }