make it subtree
[nikiroo-utils.git] / supported / BasicSupport.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.URL;
6 import java.util.ArrayList;
7 import java.util.Date;
8 import java.util.HashMap;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.Map.Entry;
12
13 import org.jsoup.helper.DataUtil;
14 import org.jsoup.nodes.Document;
15 import org.jsoup.nodes.Element;
16 import org.jsoup.nodes.Node;
17
18 import be.nikiroo.fanfix.Instance;
19 import be.nikiroo.fanfix.bundles.StringId;
20 import be.nikiroo.fanfix.data.Chapter;
21 import be.nikiroo.fanfix.data.MetaData;
22 import be.nikiroo.fanfix.data.Story;
23 import be.nikiroo.utils.Progress;
24 import be.nikiroo.utils.StringUtils;
25
26 /**
27 * This class is the base class used by the other support classes. It can be
28 * used outside of this package, and have static method that you can use to get
29 * access to the correct support class.
30 * <p>
31 * It will be used with 'resources' (usually web pages or files).
32 *
33 * @author niki
34 */
35 public abstract class BasicSupport {
36 private Document sourceNode;
37 private URL source;
38 private SupportType type;
39 private URL currentReferer; // with only one 'r', as in 'HTTP'...
40
41 static protected BasicSupportHelper bsHelper = new BasicSupportHelper();
42 static protected BasicSupportImages bsImages = new BasicSupportImages();
43 static protected BasicSupportPara bsPara = new BasicSupportPara(new BasicSupportHelper(), new BasicSupportImages());
44
45 /**
46 * Check if the given resource is supported by this {@link BasicSupport}.
47 *
48 * @param url
49 * the resource to check for
50 *
51 * @return TRUE if it is
52 */
53 protected abstract boolean supports(URL url);
54
55 /**
56 * Return TRUE if the support will return HTML encoded content values for
57 * the chapters content.
58 *
59 * @return TRUE for HTML
60 */
61 protected abstract boolean isHtml();
62
63 /**
64 * Return the {@link MetaData} of this story.
65 *
66 * @return the associated {@link MetaData}, never NULL
67 *
68 * @throws IOException
69 * in case of I/O error
70 */
71 protected abstract MetaData getMeta() throws IOException;
72
73 /**
74 * Return the story description.
75 *
76 * @return the description
77 *
78 * @throws IOException
79 * in case of I/O error
80 */
81 protected abstract String getDesc() throws IOException;
82
83 /**
84 * Return the list of chapters (name and resource).
85 * <p>
86 * Can be NULL if this {@link BasicSupport} do no use chapters.
87 *
88 * @param pg
89 * the optional progress reporter
90 *
91 * @return the chapters or NULL
92 *
93 * @throws IOException
94 * in case of I/O error
95 */
96 protected abstract List<Entry<String, URL>> getChapters(Progress pg)
97 throws IOException;
98
99 /**
100 * Return the content of the chapter (possibly HTML encoded, if
101 * {@link BasicSupport#isHtml()} is TRUE).
102 *
103 * @param chapUrl
104 * the chapter {@link URL}
105 * @param number
106 * the chapter number
107 * @param pg
108 * the optional progress reporter
109 *
110 * @return the content
111 *
112 * @throws IOException
113 * in case of I/O error
114 */
115 protected abstract String getChapterContent(URL chapUrl, int number,
116 Progress pg) throws IOException;
117
118 /**
119 * Return the list of cookies (values included) that must be used to
120 * correctly fetch the resources.
121 * <p>
122 * You are expected to call the super method implementation if you override
123 * it.
124 *
125 * @return the cookies
126 */
127 public Map<String, String> getCookies() {
128 return new HashMap<String, String>();
129 }
130
131 /**
132 * OAuth authorisation (aka, "bearer XXXXXXX").
133 *
134 * @return the OAuth string
135 */
136 public String getOAuth() {
137 return null;
138 }
139
140 /**
141 * Return the canonical form of the main {@link URL}.
142 *
143 * @param source
144 * the source {@link URL}, which can be NULL
145 *
146 * @return the canonical form of this {@link URL} or NULL if the source was
147 * NULL
148 */
149 protected URL getCanonicalUrl(URL source) {
150 return source;
151 }
152
153 /**
154 * The main {@link Node} for this {@link Story}.
155 *
156 * @return the node
157 */
158 protected Element getSourceNode() {
159 return sourceNode;
160 }
161
162 /**
163 * The main {@link URL} for this {@link Story}.
164 *
165 * @return the URL
166 */
167 protected URL getSource() {
168 return source;
169 }
170
171 /**
172 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
173 * the current {@link URL} we work on.
174 *
175 * @return the referer
176 */
177 public URL getCurrentReferer() {
178 return currentReferer;
179 }
180
181 /**
182 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
183 * the current {@link URL} we work on.
184 *
185 * @param currentReferer
186 * the new referer
187 */
188 protected void setCurrentReferer(URL currentReferer) {
189 this.currentReferer = currentReferer;
190 }
191
192 /**
193 * The support type.
194 *
195 * @return the type
196 */
197 public SupportType getType() {
198 return type;
199 }
200
201 /**
202 * The support type.
203 *
204 * @param type
205 * the new type
206 */
207 protected void setType(SupportType type) {
208 this.type = type;
209 }
210
211 /**
212 * Open an input link that will be used for the support.
213 * <p>
214 * Can return NULL, in which case you are supposed to work without a source
215 * node.
216 *
217 * @param source
218 * the source {@link URL}
219 *
220 * @return the {@link InputStream}
221 *
222 * @throws IOException
223 * in case of I/O error
224 */
225 protected Document loadDocument(URL source) throws IOException {
226 String url = getCanonicalUrl(source).toString();
227 return DataUtil.load(Instance.getCache().open(source, this, false),
228 "UTF-8", url.toString());
229 }
230
231 /**
232 * Log into the support (can be a no-op depending upon the support).
233 *
234 * @throws IOException
235 * in case of I/O error
236 */
237 protected void login() throws IOException {
238 }
239
240 /**
241 * Now that we have processed the {@link Story}, close the resources if any.
242 */
243 protected void close() {
244 setCurrentReferer(null);
245 }
246
247 /**
248 * Process the given story resource into a partially filled {@link Story}
249 * object containing the name and metadata.
250 *
251 * @param getDesc
252 * retrieve the description of the story, or not
253 * @param pg
254 * the optional progress reporter
255 *
256 * @return the {@link Story}, never NULL
257 *
258 * @throws IOException
259 * in case of I/O error
260 */
261 protected Story processMeta(boolean getDesc, Progress pg)
262 throws IOException {
263 if (pg == null) {
264 pg = new Progress();
265 } else {
266 pg.setMinMax(0, 100);
267 }
268
269 pg.setProgress(30);
270
271 Story story = new Story();
272 MetaData meta = getMeta();
273 if (meta.getCreationDate() == null || meta.getCreationDate().isEmpty()) {
274 meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
275 }
276 story.setMeta(meta);
277
278 pg.setProgress(50);
279
280 if (meta.getCover() == null) {
281 meta.setCover(bsHelper.getDefaultCover(meta.getSubject()));
282 }
283
284 pg.setProgress(60);
285
286 if (getDesc) {
287 String descChapterName = Instance.getTrans().getString(
288 StringId.DESCRIPTION);
289 story.getMeta().setResume(
290 bsPara.makeChapter(this, source, 0,
291 descChapterName, //
292 getDesc(), isHtml(), null));
293 }
294
295 pg.done();
296 return story;
297 }
298
299 /**
300 * Process the given story resource into a fully filled {@link Story}
301 * object.
302 *
303 * @param pg
304 * the optional progress reporter
305 *
306 * @return the {@link Story}, never NULL
307 *
308 * @throws IOException
309 * in case of I/O error
310 */
311 // TODO: ADD final when BasicSupport_Deprecated is gone
312 public Story process(Progress pg) throws IOException {
313 setCurrentReferer(source);
314 login();
315 sourceNode = loadDocument(source);
316
317 try {
318 return doProcess(pg);
319 } finally {
320 close();
321 }
322 }
323
324 /**
325 * Actual processing step, without the calls to other methods.
326 * <p>
327 * Will convert the story resource into a fully filled {@link Story} object.
328 *
329 * @param pg
330 * the optional progress reporter
331 *
332 * @return the {@link Story}, never NULL
333 *
334 * @throws IOException
335 * in case of I/O error
336 */
337 protected Story doProcess(Progress pg) throws IOException {
338 if (pg == null) {
339 pg = new Progress();
340 } else {
341 pg.setMinMax(0, 100);
342 }
343
344 pg.setProgress(1);
345 Progress pgMeta = new Progress();
346 pg.addProgress(pgMeta, 10);
347 Story story = processMeta(true, pgMeta);
348 pgMeta.done(); // 10%
349
350 pg.setName("Retrieving " + story.getMeta().getTitle());
351
352 Progress pgGetChapters = new Progress();
353 pg.addProgress(pgGetChapters, 10);
354 story.setChapters(new ArrayList<Chapter>());
355 List<Entry<String, URL>> chapters = getChapters(pgGetChapters);
356 pgGetChapters.done(); // 20%
357
358 if (chapters != null) {
359 Progress pgChaps = new Progress("Extracting chapters", 0,
360 chapters.size() * 300);
361 pg.addProgress(pgChaps, 80);
362
363 long words = 0;
364 int i = 1;
365 for (Entry<String, URL> chap : chapters) {
366 pgChaps.setName("Extracting chapter " + i);
367 URL chapUrl = chap.getValue();
368 String chapName = chap.getKey();
369 if (chapUrl != null) {
370 setCurrentReferer(chapUrl);
371 }
372
373 pgChaps.setProgress(i * 100);
374 Progress pgGetChapterContent = new Progress();
375 Progress pgMakeChapter = new Progress();
376 pgChaps.addProgress(pgGetChapterContent, 100);
377 pgChaps.addProgress(pgMakeChapter, 100);
378
379 String content = getChapterContent(chapUrl, i,
380 pgGetChapterContent);
381 pgGetChapterContent.done();
382 Chapter cc = bsPara.makeChapter(this, chapUrl, i,
383 chapName, content, isHtml(), pgMakeChapter);
384 pgMakeChapter.done();
385
386 words += cc.getWords();
387 story.getChapters().add(cc);
388 story.getMeta().setWords(words);
389
390 i++;
391 }
392
393 pgChaps.setName("Extracting chapters");
394 pgChaps.done();
395 }
396
397 pg.done();
398
399 return story;
400 }
401
402 /**
403 * Create a chapter from the given data.
404 *
405 * @param source
406 * the source URL for this content, which can be used to try and
407 * find images if images are present in the format [image-url]
408 * @param number
409 * the chapter number (0 = description)
410 * @param name
411 * the chapter name
412 * @param content
413 * the content of the chapter
414 * @return the {@link Chapter}
415 *
416 * @throws IOException
417 * in case of I/O error
418 */
419 public Chapter makeChapter(URL source, int number, String name,
420 String content) throws IOException {
421 return bsPara.makeChapter(this, source, number, name,
422 content, isHtml(), null);
423 }
424
425 /**
426 * Return a {@link BasicSupport} implementation supporting the given
427 * resource if possible.
428 *
429 * @param url
430 * the story resource
431 *
432 * @return an implementation that supports it, or NULL
433 */
434 public static BasicSupport getSupport(URL url) {
435 if (url == null) {
436 return null;
437 }
438
439 // TEXT and INFO_TEXT always support files (not URLs though)
440 for (SupportType type : SupportType.values()) {
441 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
442 BasicSupport support = getSupport(type, url);
443 if (support != null && support.supports(url)) {
444 return support;
445 }
446 }
447 }
448
449 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
450 SupportType.TEXT }) {
451 BasicSupport support = getSupport(type, url);
452 if (support != null && support.supports(url)) {
453 return support;
454 }
455 }
456
457 return null;
458 }
459
460 /**
461 * Return a {@link BasicSupport} implementation supporting the given type.
462 *
463 * @param type
464 * the type, must not be NULL
465 * @param url
466 * the {@link URL} to support (can be NULL to get an
467 * "abstract support"; if not NULL, will be used as the source
468 * URL)
469 *
470 * @return an implementation that supports it, or NULL
471 */
472 public static BasicSupport getSupport(SupportType type, URL url) {
473 BasicSupport support = null;
474
475 switch (type) {
476 case EPUB:
477 support = new Epub();
478 break;
479 case INFO_TEXT:
480 support = new InfoText();
481 break;
482 case FIMFICTION:
483 try {
484 // Can fail if no client key or NO in options
485 support = new FimfictionApi();
486 } catch (IOException e) {
487 support = new Fimfiction();
488 }
489 break;
490 case FANFICTION:
491 support = new Fanfiction();
492 break;
493 case TEXT:
494 support = new Text();
495 break;
496 case MANGAFOX:
497 support = new MangaFox();
498 break;
499 case E621:
500 support = new E621();
501 break;
502 case YIFFSTAR:
503 support = new YiffStar();
504 break;
505 case E_HENTAI:
506 support = new EHentai();
507 break;
508 case MANGA_LEL:
509 support = new MangaLel();
510 break;
511 case CBZ:
512 support = new Cbz();
513 break;
514 case HTML:
515 support = new Html();
516 break;
517 }
518
519 if (support != null) {
520 support.setType(type);
521 support.source = support.getCanonicalUrl(url);
522 }
523
524 return support;
525 }
526 }