Fix tests
[fanfix.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
08fe2e33
NR
3import java.io.IOException;
4import java.io.InputStream;
08fe2e33 5import java.net.URL;
08fe2e33 6import java.util.ArrayList;
793f1071 7import java.util.Date;
08fe2e33
NR
8import java.util.HashMap;
9import java.util.List;
10import java.util.Map;
11import java.util.Map.Entry;
0ffa4754
NR
12
13import org.jsoup.helper.DataUtil;
14import org.jsoup.nodes.Document;
15import org.jsoup.nodes.Element;
16import org.jsoup.nodes.Node;
08fe2e33
NR
17
18import be.nikiroo.fanfix.Instance;
08fe2e33
NR
19import be.nikiroo.fanfix.bundles.StringId;
20import be.nikiroo.fanfix.data.Chapter;
21import be.nikiroo.fanfix.data.MetaData;
9252c65e 22import be.nikiroo.fanfix.data.Story;
3b2b638f 23import be.nikiroo.utils.Progress;
08fe2e33
NR
24import be.nikiroo.utils.StringUtils;
25
26/**
27 * This class is the base class used by the other support classes. It can be
28 * used outside of this package, and have static method that you can use to get
29 * access to the correct support class.
30 * <p>
31 * It will be used with 'resources' (usually web pages or files).
32 *
33 * @author niki
34 */
35public abstract class BasicSupport {
0ffa4754
NR
36 private Document sourceNode;
37 private URL source;
08fe2e33 38 private SupportType type;
22848428 39 private URL currentReferer; // with only one 'r', as in 'HTTP'...
08fe2e33 40
08fe2e33
NR
41 /**
42 * The name of this support class.
43 *
44 * @return the name
45 */
46 protected abstract String getSourceName();
47
48 /**
49 * Check if the given resource is supported by this {@link BasicSupport}.
50 *
51 * @param url
52 * the resource to check for
53 *
54 * @return TRUE if it is
55 */
56 protected abstract boolean supports(URL url);
57
58 /**
59 * Return TRUE if the support will return HTML encoded content values for
60 * the chapters content.
61 *
62 * @return TRUE for HTML
63 */
64 protected abstract boolean isHtml();
65
0efd25e3
NR
66 /**
67 * Return the {@link MetaData} of this story.
68 *
776ad3c6 69 * @return the associated {@link MetaData}, never NULL
0efd25e3
NR
70 *
71 * @throws IOException
72 * in case of I/O error
73 */
0ffa4754 74 protected abstract MetaData getMeta() throws IOException;
08fe2e33
NR
75
76 /**
77 * Return the story description.
78 *
08fe2e33
NR
79 * @return the description
80 *
81 * @throws IOException
82 * in case of I/O error
83 */
0ffa4754 84 protected abstract String getDesc() throws IOException;
08fe2e33 85
08fe2e33 86 /**
0ffa4754
NR
87 * Return the list of chapters (name and resource). *
88 * <p>
89 * Can be NULL if this {@link BasicSupport} do no use chapters.
08fe2e33 90 *
ed08c171
NR
91 * @param pg
92 * the optional progress reporter
08fe2e33 93 *
0ffa4754 94 * @return the chapters or NULL
08fe2e33
NR
95 *
96 * @throws IOException
97 * in case of I/O error
98 */
0ffa4754
NR
99 protected abstract List<Entry<String, URL>> getChapters(Progress pg)
100 throws IOException;
08fe2e33
NR
101
102 /**
103 * Return the content of the chapter (possibly HTML encoded, if
104 * {@link BasicSupport#isHtml()} is TRUE).
105 *
0ffa4754
NR
106 * @param chapUrl
107 * the chapter {@link URL}
08fe2e33
NR
108 * @param number
109 * the chapter number
ed08c171
NR
110 * @param pg
111 * the optional progress reporter
08fe2e33
NR
112 *
113 * @return the content
114 *
115 * @throws IOException
116 * in case of I/O error
117 */
0ffa4754
NR
118 protected abstract String getChapterContent(URL chapUrl, int number,
119 Progress pg) throws IOException;
6e06d2cc 120
08fe2e33
NR
121 /**
122 * Return the list of cookies (values included) that must be used to
123 * correctly fetch the resources.
124 * <p>
125 * You are expected to call the super method implementation if you override
126 * it.
127 *
128 * @return the cookies
129 */
315f14ae 130 public Map<String, String> getCookies() {
08fe2e33
NR
131 return new HashMap<String, String>();
132 }
133
315f14ae
NR
134 /**
135 * OAuth authorisation (aka, "bearer XXXXXXX").
136 *
137 * @return the OAuth string
138 */
139 public String getOAuth() {
140 return null;
141 }
142
a4143cd7
NR
143 /**
144 * Return the canonical form of the main {@link URL}.
145 *
146 * @param source
0ffa4754
NR
147 * the source {@link URL}, which can be NULL
148 *
149 * @return the canonical form of this {@link URL} or NULL if the source was
150 * NULL
151 */
152 protected URL getCanonicalUrl(URL source) {
153 return source;
154 }
155
156 /**
157 * The main {@link Node} for this {@link Story}.
158 *
159 * @return the node
160 */
161 protected Element getSourceNode() {
162 return sourceNode;
163 }
164
165 /**
166 * The main {@link URL} for this {@link Story}.
167 *
168 * @return the URL
169 */
170 protected URL getSource() {
171 return source;
172 }
173
174 /**
175 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
176 * the current {@link URL} we work on.
177 *
178 * @return the referer
179 */
180 public URL getCurrentReferer() {
181 return currentReferer;
182 }
183
184 /**
185 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
186 * the current {@link URL} we work on.
187 *
188 * @param currentReferer
189 * the new referer
190 */
191 protected void setCurrentReferer(URL currentReferer) {
192 this.currentReferer = currentReferer;
193 }
194
195 /**
196 * The support type.
197 *
198 * @return the type
199 */
200 public SupportType getType() {
201 return type;
202 }
203
204 /**
205 * The support type.
206 *
207 * @param type
208 * the new type
209 */
210 protected void setType(SupportType type) {
211 this.type = type;
212 }
213
214 /**
215 * Open an input link that will be used for the support.
216 * <p>
217 * Can return NULL, in which case you are supposed to work without an
218 * {@link InputStream}.
219 *
220 * @param source
a4143cd7
NR
221 * the source {@link URL}
222 *
0ffa4754
NR
223 * @return the {@link InputStream}
224 *
225 * @throws IOException
226 * in case of I/O error
227 */
228 protected Document loadDocument(URL source) throws IOException {
229 String url = getCanonicalUrl(source).toString();
230 return DataUtil.load(Instance.getCache().open(source, this, false),
231 "UTF-8", url.toString());
232 }
233
234 /**
235 * Log into the support (can be a no-op depending upon the support).
a4143cd7
NR
236 *
237 * @throws IOException
238 * in case of I/O error
239 */
315f14ae 240 @SuppressWarnings("unused")
0ffa4754
NR
241 protected void login() throws IOException {
242 }
243
244 /**
245 * Prepare the support if needed before processing.
246 *
247 * @throws IOException
248 * on I/O error
249 */
250 @SuppressWarnings("unused")
251 protected void preprocess() throws IOException {
252 }
253
254 /**
255 * Now that we have processed the {@link Story}, close the resources if any.
256 */
257 protected void close() {
258 setCurrentReferer(null);
a4143cd7
NR
259 }
260
08fe2e33
NR
261 /**
262 * Process the given story resource into a partially filled {@link Story}
263 * object containing the name and metadata, except for the description.
264 *
08fe2e33
NR
265 * @return the {@link Story}
266 *
267 * @throws IOException
268 * in case of I/O error
269 */
0ffa4754
NR
270 public Story processMeta() throws IOException {
271 Story story = null;
272
273 preprocess();
274 try {
275 story = processMeta(false, null);
276 } finally {
277 close();
278 }
279
280 return story;
08fe2e33
NR
281 }
282
283 /**
284 * Process the given story resource into a partially filled {@link Story}
285 * object containing the name and metadata.
286 *
0efd25e3
NR
287 * @param getDesc
288 * retrieve the description of the story, or not
ed08c171
NR
289 * @param pg
290 * the optional progress reporter
08fe2e33 291 *
776ad3c6 292 * @return the {@link Story}, never NULL
08fe2e33
NR
293 *
294 * @throws IOException
295 * in case of I/O error
296 */
0ffa4754
NR
297 protected Story processMeta(boolean getDesc, Progress pg)
298 throws IOException {
ed08c171
NR
299 if (pg == null) {
300 pg = new Progress();
301 } else {
302 pg.setMinMax(0, 100);
303 }
304
0ffa4754 305 pg.setProgress(30);
ed08c171 306
0ffa4754
NR
307 Story story = new Story();
308 MetaData meta = getMeta();
309 if (meta.getCreationDate() == null || meta.getCreationDate().isEmpty()) {
310 meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
311 }
312 story.setMeta(meta);
ed08c171 313
0ffa4754 314 pg.setProgress(50);
08fe2e33 315
0ffa4754
NR
316 if (meta.getCover() == null) {
317 meta.setCover(BasicSupportHelper.getDefaultCover(meta.getSubject()));
318 }
08fe2e33 319
0ffa4754 320 pg.setProgress(60);
a4143cd7 321
0ffa4754
NR
322 if (getDesc) {
323 String descChapterName = Instance.getTrans().getString(
324 StringId.DESCRIPTION);
325 story.getMeta().setResume(
326 BasicSupportPara.makeChapter(this, source, 0,
327 descChapterName, //
328 getDesc(), isHtml(), null));
08fe2e33 329 }
0ffa4754
NR
330
331 pg.setProgress(100);
332 return story;
08fe2e33
NR
333 }
334
335 /**
336 * Process the given story resource into a fully filled {@link Story}
337 * object.
338 *
92fb0719
NR
339 * @param pg
340 * the optional progress reporter
08fe2e33 341 *
776ad3c6 342 * @return the {@link Story}, never NULL
08fe2e33
NR
343 *
344 * @throws IOException
345 * in case of I/O error
346 */
0ffa4754 347 public Story process(Progress pg) throws IOException {
92fb0719
NR
348 if (pg == null) {
349 pg = new Progress();
350 } else {
351 pg.setMinMax(0, 100);
352 }
353
0ffa4754
NR
354 setCurrentReferer(source);
355 login();
356 sourceNode = loadDocument(source);
357
92fb0719 358 pg.setProgress(1);
08fe2e33 359 try {
ed08c171
NR
360 Progress pgMeta = new Progress();
361 pg.addProgress(pgMeta, 10);
0ffa4754
NR
362 preprocess();
363 Story story = processMeta(true, pgMeta);
ed08c171
NR
364 if (!pgMeta.isDone()) {
365 pgMeta.setProgress(pgMeta.getMax()); // 10%
366 }
367
754a5bc2
NR
368 pg.setName("Retrieving " + story.getMeta().getTitle());
369
ed08c171
NR
370 Progress pgGetChapters = new Progress();
371 pg.addProgress(pgGetChapters, 10);
08fe2e33 372 story.setChapters(new ArrayList<Chapter>());
0ffa4754 373 List<Entry<String, URL>> chapters = getChapters(pgGetChapters);
ed08c171
NR
374 if (!pgGetChapters.isDone()) {
375 pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
376 }
08fe2e33 377
08fe2e33 378 if (chapters != null) {
ed08c171
NR
379 Progress pgChaps = new Progress("Extracting chapters", 0,
380 chapters.size() * 300);
92fb0719
NR
381 pg.addProgress(pgChaps, 80);
382
793f1071 383 long words = 0;
ed08c171 384 int i = 1;
08fe2e33 385 for (Entry<String, URL> chap : chapters) {
ed08c171 386 pgChaps.setName("Extracting chapter " + i);
0ffa4754
NR
387 URL chapUrl = chap.getValue();
388 String chapName = chap.getKey();
389 if (chapUrl != null) {
390 setCurrentReferer(chapUrl);
315f14ae 391 }
ed08c171 392
0ffa4754
NR
393 pgChaps.setProgress(i * 100);
394 Progress pgGetChapterContent = new Progress();
395 Progress pgMakeChapter = new Progress();
396 pgChaps.addProgress(pgGetChapterContent, 100);
397 pgChaps.addProgress(pgMakeChapter, 100);
398
399 String content = getChapterContent(chapUrl, i,
400 pgGetChapterContent);
401 if (!pgGetChapterContent.isDone()) {
402 pgGetChapterContent.setProgress(pgGetChapterContent
403 .getMax());
404 }
ed08c171 405
0ffa4754
NR
406 Chapter cc = BasicSupportPara.makeChapter(this, chapUrl, i,
407 chapName, content, isHtml(), pgMakeChapter);
408 if (!pgMakeChapter.isDone()) {
409 pgMakeChapter.setProgress(pgMakeChapter.getMax());
08fe2e33 410 }
a6395bef 411
0ffa4754
NR
412 words += cc.getWords();
413 story.getChapters().add(cc);
414 story.getMeta().setWords(words);
415
ed08c171 416 i++;
08fe2e33 417 }
ed08c171
NR
418
419 pgChaps.setName("Extracting chapters");
92fb0719 420 } else {
ed08c171 421 pg.setProgress(80);
08fe2e33
NR
422 }
423
424 return story;
08fe2e33 425 } finally {
0ffa4754 426 close();
08fe2e33
NR
427 }
428 }
429
430 /**
0ffa4754
NR
431 * Return a {@link BasicSupport} implementation supporting the given
432 * resource if possible.
08fe2e33 433 *
0ffa4754
NR
434 * @param url
435 * the story resource
08fe2e33 436 *
0ffa4754 437 * @return an implementation that supports it, or NULL
08fe2e33 438 */
0ffa4754
NR
439 public static BasicSupport getSupport(URL url) {
440 if (url == null) {
441 return null;
442 }
08fe2e33 443
0ffa4754
NR
444 // TEXT and INFO_TEXT always support files (not URLs though)
445 for (SupportType type : SupportType.values()) {
446 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
447 BasicSupport support = getSupport(type, url);
448 if (support != null && support.supports(url)) {
449 return support;
450 }
451 }
452 }
08fe2e33 453
0ffa4754
NR
454 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
455 SupportType.TEXT }) {
456 BasicSupport support = getSupport(type, url);
457 if (support != null && support.supports(url)) {
458 return support;
459 }
460 }
461
462 return null;
08fe2e33
NR
463 }
464
465 /**
0ffa4754 466 * Return a {@link BasicSupport} implementation supporting the given type.
08fe2e33 467 *
0ffa4754
NR
468 * @param type
469 * the type
470 * @param url
471 * the {@link URL} to support (can be NULL to get an
472 * "abstract support")
08fe2e33 473 *
0ffa4754 474 * @return an implementation that supports it, or NULL
08fe2e33 475 */
0ffa4754
NR
476 public static BasicSupport getSupport(SupportType type, URL url) {
477 BasicSupport support = null;
08fe2e33 478
08fe2e33
NR
479 switch (type) {
480 case EPUB:
0ffa4754
NR
481 support = new Epub();
482 break;
08fe2e33 483 case INFO_TEXT:
0ffa4754
NR
484 support = new InfoText();
485 break;
08fe2e33 486 case FIMFICTION:
315f14ae
NR
487 try {
488 // Can fail if no client key or NO in options
0ffa4754 489 support = new FimfictionApi();
315f14ae 490 } catch (IOException e) {
0ffa4754 491 support = new Fimfiction();
315f14ae 492 }
0ffa4754 493 break;
08fe2e33 494 case FANFICTION:
0ffa4754
NR
495 support = new Fanfiction();
496 break;
08fe2e33 497 case TEXT:
0ffa4754
NR
498 support = new Text();
499 break;
08fe2e33 500 case MANGAFOX:
0ffa4754
NR
501 support = new MangaFox();
502 break;
08fe2e33 503 case E621:
0ffa4754
NR
504 support = new E621();
505 break;
a4143cd7 506 case YIFFSTAR:
0ffa4754
NR
507 support = new YiffStar();
508 break;
f0608ab1 509 case E_HENTAI:
0ffa4754
NR
510 support = new EHentai();
511 break;
08fe2e33 512 case CBZ:
0ffa4754
NR
513 support = new Cbz();
514 break;
373da363 515 case HTML:
0ffa4754
NR
516 support = new Html();
517 break;
68686a37
NR
518 }
519
0ffa4754
NR
520 if (support != null) {
521 support.setType(type);
522 support.source = support.getCanonicalUrl(url);
315f14ae
NR
523 }
524
0ffa4754 525 return support;
315f14ae 526 }
08fe2e33 527}