try +1
[fanfix.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.URL;
6 import java.util.ArrayList;
7 import java.util.Date;
8 import java.util.HashMap;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.Map.Entry;
12
13 import org.jsoup.helper.DataUtil;
14 import org.jsoup.nodes.Document;
15 import org.jsoup.nodes.Element;
16 import org.jsoup.nodes.Node;
17
18 import be.nikiroo.fanfix.Instance;
19 import be.nikiroo.fanfix.bundles.StringId;
20 import be.nikiroo.fanfix.data.Chapter;
21 import be.nikiroo.fanfix.data.MetaData;
22 import be.nikiroo.fanfix.data.Story;
23 import be.nikiroo.utils.Progress;
24 import be.nikiroo.utils.StringUtils;
25
26 /**
27 * This class is the base class used by the other support classes. It can be
28 * used outside of this package, and have static method that you can use to get
29 * access to the correct support class.
30 * <p>
31 * It will be used with 'resources' (usually web pages or files).
32 *
33 * @author niki
34 */
35 public abstract class BasicSupport {
36 private Document sourceNode;
37 private URL source;
38 private SupportType type;
39 private URL currentReferer; // with only one 'r', as in 'HTTP'...
40
41 /**
42 * The name of this support class.
43 *
44 * @return the name
45 */
46 protected abstract String getSourceName();
47
48 /**
49 * Check if the given resource is supported by this {@link BasicSupport}.
50 *
51 * @param url
52 * the resource to check for
53 *
54 * @return TRUE if it is
55 */
56 protected abstract boolean supports(URL url);
57
58 /**
59 * Return TRUE if the support will return HTML encoded content values for
60 * the chapters content.
61 *
62 * @return TRUE for HTML
63 */
64 protected abstract boolean isHtml();
65
66 /**
67 * Return the {@link MetaData} of this story.
68 *
69 * @return the associated {@link MetaData}, never NULL
70 *
71 * @throws IOException
72 * in case of I/O error
73 */
74 protected abstract MetaData getMeta() throws IOException;
75
76 /**
77 * Return the story description.
78 *
79 * @return the description
80 *
81 * @throws IOException
82 * in case of I/O error
83 */
84 protected abstract String getDesc() throws IOException;
85
86 /**
87 * Return the list of chapters (name and resource). *
88 * <p>
89 * Can be NULL if this {@link BasicSupport} do no use chapters.
90 *
91 * @param pg
92 * the optional progress reporter
93 *
94 * @return the chapters or NULL
95 *
96 * @throws IOException
97 * in case of I/O error
98 */
99 protected abstract List<Entry<String, URL>> getChapters(Progress pg)
100 throws IOException;
101
102 /**
103 * Return the content of the chapter (possibly HTML encoded, if
104 * {@link BasicSupport#isHtml()} is TRUE).
105 *
106 * @param chapUrl
107 * the chapter {@link URL}
108 * @param number
109 * the chapter number
110 * @param pg
111 * the optional progress reporter
112 *
113 * @return the content
114 *
115 * @throws IOException
116 * in case of I/O error
117 */
118 protected abstract String getChapterContent(URL chapUrl, int number,
119 Progress pg) throws IOException;
120
121 /**
122 * Return the list of cookies (values included) that must be used to
123 * correctly fetch the resources.
124 * <p>
125 * You are expected to call the super method implementation if you override
126 * it.
127 *
128 * @return the cookies
129 */
130 public Map<String, String> getCookies() {
131 return new HashMap<String, String>();
132 }
133
134 /**
135 * OAuth authorisation (aka, "bearer XXXXXXX").
136 *
137 * @return the OAuth string
138 */
139 public String getOAuth() {
140 return null;
141 }
142
143 /**
144 * Return the canonical form of the main {@link URL}.
145 *
146 * @param source
147 * the source {@link URL}, which can be NULL
148 *
149 * @return the canonical form of this {@link URL} or NULL if the source was
150 * NULL
151 */
152 protected URL getCanonicalUrl(URL source) {
153 return source;
154 }
155
156 /**
157 * The main {@link Node} for this {@link Story}.
158 *
159 * @return the node
160 */
161 protected Element getSourceNode() {
162 return sourceNode;
163 }
164
165 /**
166 * The main {@link URL} for this {@link Story}.
167 *
168 * @return the URL
169 */
170 protected URL getSource() {
171 return source;
172 }
173
174 /**
175 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
176 * the current {@link URL} we work on.
177 *
178 * @return the referer
179 */
180 public URL getCurrentReferer() {
181 return currentReferer;
182 }
183
184 /**
185 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
186 * the current {@link URL} we work on.
187 *
188 * @param currentReferer
189 * the new referer
190 */
191 protected void setCurrentReferer(URL currentReferer) {
192 this.currentReferer = currentReferer;
193 }
194
195 /**
196 * The support type.
197 *
198 * @return the type
199 */
200 public SupportType getType() {
201 return type;
202 }
203
204 /**
205 * The support type.
206 *
207 * @param type
208 * the new type
209 */
210 protected void setType(SupportType type) {
211 this.type = type;
212 }
213
214 /**
215 * Open an input link that will be used for the support.
216 * <p>
217 * Can return NULL, in which case you are supposed to work without a source
218 * node.
219 *
220 * @param source
221 * the source {@link URL}
222 *
223 * @return the {@link InputStream}
224 *
225 * @throws IOException
226 * in case of I/O error
227 */
228 protected Document loadDocument(URL source) throws IOException {
229 String url = getCanonicalUrl(source).toString();
230 return DataUtil.load(Instance.getCache().open(source, this, false),
231 "UTF-8", url.toString());
232 }
233
234 /**
235 * Log into the support (can be a no-op depending upon the support).
236 *
237 * @throws IOException
238 * in case of I/O error
239 */
240 protected void login() throws IOException {
241 }
242
243 /**
244 * Now that we have processed the {@link Story}, close the resources if any.
245 */
246 protected void close() {
247 setCurrentReferer(null);
248 }
249
250 /**
251 * Process the given story resource into a partially filled {@link Story}
252 * object containing the name and metadata, except for the description.
253 *
254 * @return the {@link Story}
255 *
256 * @throws IOException
257 * in case of I/O error
258 */
259 public final Story processMeta() throws IOException {
260 Story story = null;
261
262 try {
263 story = processMeta(false, null);
264 } finally {
265 close();
266 }
267
268 return story;
269 }
270
271 /**
272 * Process the given story resource into a partially filled {@link Story}
273 * object containing the name and metadata.
274 *
275 * @param getDesc
276 * retrieve the description of the story, or not
277 * @param pg
278 * the optional progress reporter
279 *
280 * @return the {@link Story}, never NULL
281 *
282 * @throws IOException
283 * in case of I/O error
284 */
285 protected Story processMeta(boolean getDesc, Progress pg)
286 throws IOException {
287 if (pg == null) {
288 pg = new Progress();
289 } else {
290 pg.setMinMax(0, 100);
291 }
292
293 pg.setProgress(30);
294
295 Story story = new Story();
296 MetaData meta = getMeta();
297 System.out.println("meta from support get: "+meta);
298 if (meta.getCreationDate() == null || meta.getCreationDate().isEmpty()) {
299 meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
300 }
301 story.setMeta(meta);
302
303 pg.setProgress(50);
304
305 if (meta.getCover() == null) {
306 meta.setCover(BasicSupportHelper.getDefaultCover(meta.getSubject()));
307 }
308
309 pg.setProgress(60);
310
311 if (getDesc) {
312 String descChapterName = Instance.getTrans().getString(
313 StringId.DESCRIPTION);
314 story.getMeta().setResume(
315 BasicSupportPara.makeChapter(this, source, 0,
316 descChapterName, //
317 getDesc(), isHtml(), null));
318 }
319
320 pg.setProgress(100);
321 return story;
322 }
323
324 /**
325 * Actual processing step, without the calls to other methods.
326 * <p>
327 * Will convert the story resource into a fully filled {@link Story} object.
328 *
329 * @param pg
330 * the optional progress reporter
331 *
332 * @return the {@link Story}, never NULL
333 *
334 * @throws IOException
335 * in case of I/O error
336 */
337 // TODO: add final
338 public Story process(Progress pg) throws IOException {
339 setCurrentReferer(source);
340 login();
341 sourceNode = loadDocument(source);
342
343 try {
344 return doProcess(pg);
345 } finally {
346 close();
347 }
348 }
349
350 /**
351 * Process the given story resource into a fully filled {@link Story}
352 * object.
353 *
354 * @param pg
355 * the optional progress reporter
356 *
357 * @return the {@link Story}, never NULL
358 *
359 * @throws IOException
360 * in case of I/O error
361 */
362 public Story doProcess(Progress pg) throws IOException {
363 if (pg == null) {
364 pg = new Progress();
365 } else {
366 pg.setMinMax(0, 100);
367 }
368
369 pg.setProgress(1);
370 Progress pgMeta = new Progress();
371 pg.addProgress(pgMeta, 10);
372 Story story = processMeta(true, pgMeta);
373 if (!pgMeta.isDone()) {
374 pgMeta.setProgress(pgMeta.getMax()); // 10%
375 }
376
377 pg.setName("Retrieving " + story.getMeta().getTitle());
378
379 Progress pgGetChapters = new Progress();
380 pg.addProgress(pgGetChapters, 10);
381 story.setChapters(new ArrayList<Chapter>());
382 List<Entry<String, URL>> chapters = getChapters(pgGetChapters);
383 if (!pgGetChapters.isDone()) {
384 pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
385 }
386
387 if (chapters != null) {
388 Progress pgChaps = new Progress("Extracting chapters", 0,
389 chapters.size() * 300);
390 pg.addProgress(pgChaps, 80);
391
392 long words = 0;
393 int i = 1;
394 for (Entry<String, URL> chap : chapters) {
395 pgChaps.setName("Extracting chapter " + i);
396 URL chapUrl = chap.getValue();
397 String chapName = chap.getKey();
398 if (chapUrl != null) {
399 setCurrentReferer(chapUrl);
400 }
401
402 pgChaps.setProgress(i * 100);
403 Progress pgGetChapterContent = new Progress();
404 Progress pgMakeChapter = new Progress();
405 pgChaps.addProgress(pgGetChapterContent, 100);
406 pgChaps.addProgress(pgMakeChapter, 100);
407
408 String content = getChapterContent(chapUrl, i,
409 pgGetChapterContent);
410 if (!pgGetChapterContent.isDone()) {
411 pgGetChapterContent.setProgress(pgGetChapterContent
412 .getMax());
413 }
414
415 Chapter cc = BasicSupportPara.makeChapter(this, chapUrl, i,
416 chapName, content, isHtml(), pgMakeChapter);
417 if (!pgMakeChapter.isDone()) {
418 pgMakeChapter.setProgress(pgMakeChapter.getMax());
419 }
420
421 words += cc.getWords();
422 story.getChapters().add(cc);
423 story.getMeta().setWords(words);
424
425 i++;
426 }
427
428 pgChaps.setName("Extracting chapters");
429 } else {
430 pg.setProgress(80);
431 }
432
433 return story;
434 }
435
436 /**
437 * Return a {@link BasicSupport} implementation supporting the given
438 * resource if possible.
439 *
440 * @param url
441 * the story resource
442 *
443 * @return an implementation that supports it, or NULL
444 */
445 public static BasicSupport getSupport(URL url) {
446 if (url == null) {
447 return null;
448 }
449
450 // TEXT and INFO_TEXT always support files (not URLs though)
451 for (SupportType type : SupportType.values()) {
452 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
453 BasicSupport support = getSupport(type, url);
454 if (support != null && support.supports(url)) {
455 return support;
456 }
457 }
458 }
459
460 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
461 SupportType.TEXT }) {
462 BasicSupport support = getSupport(type, url);
463 if (support != null && support.supports(url)) {
464 return support;
465 }
466 }
467
468 return null;
469 }
470
471 /**
472 * Return a {@link BasicSupport} implementation supporting the given type.
473 *
474 * @param type
475 * the type
476 * @param url
477 * the {@link URL} to support (can be NULL to get an
478 * "abstract support")
479 *
480 * @return an implementation that supports it, or NULL
481 */
482 public static BasicSupport getSupport(SupportType type, URL url) {
483 BasicSupport support = null;
484
485 switch (type) {
486 case EPUB:
487 support = new Epub();
488 break;
489 case INFO_TEXT:
490 support = new InfoText();
491 break;
492 case FIMFICTION:
493 try {
494 // Can fail if no client key or NO in options
495 support = new FimfictionApi();
496 } catch (IOException e) {
497 support = new Fimfiction();
498 }
499 break;
500 case FANFICTION:
501 support = new Fanfiction();
502 break;
503 case TEXT:
504 support = new Text();
505 break;
506 case MANGAFOX:
507 support = new MangaFox();
508 break;
509 case E621:
510 support = new E621();
511 break;
512 case YIFFSTAR:
513 support = new YiffStar();
514 break;
515 case E_HENTAI:
516 support = new EHentai();
517 break;
518 case CBZ:
519 support = new Cbz();
520 break;
521 case HTML:
522 support = new Html();
523 break;
524 }
525
526 if (support != null) {
527 support.setType(type);
528 support.source = support.getCanonicalUrl(url);
529 }
530
531 return support;
532 }
533 }