1 package be
.nikiroo
.fanfix
.supported
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
5 import java
.net
.MalformedURLException
;
7 import java
.util
.ArrayList
;
9 import java
.util
.HashMap
;
10 import java
.util
.List
;
12 import java
.util
.Scanner
;
13 import java
.util
.Map
.Entry
;
15 import org
.json
.JSONException
;
16 import org
.json
.JSONObject
;
17 import org
.jsoup
.helper
.DataUtil
;
18 import org
.jsoup
.nodes
.Document
;
19 import org
.jsoup
.nodes
.Element
;
20 import org
.jsoup
.nodes
.Node
;
22 import be
.nikiroo
.fanfix
.Instance
;
23 import be
.nikiroo
.fanfix
.bundles
.StringId
;
24 import be
.nikiroo
.fanfix
.data
.Chapter
;
25 import be
.nikiroo
.fanfix
.data
.MetaData
;
26 import be
.nikiroo
.fanfix
.data
.Story
;
27 import be
.nikiroo
.utils
.Progress
;
28 import be
.nikiroo
.utils
.StringUtils
;
31 * This class is the base class used by the other support classes. It can be
32 * used outside of this package, and have static method that you can use to get
33 * access to the correct support class.
35 * It will be used with 'resources' (usually web pages or files).
39 public abstract class BasicSupport
{
40 private Document sourceNode
;
42 private SupportType type
;
43 private URL currentReferer
; // with only one 'r', as in 'HTTP'...
45 static protected BasicSupportHelper bsHelper
= new BasicSupportHelper();
46 static protected BasicSupportImages bsImages
= new BasicSupportImages();
47 static protected BasicSupportPara bsPara
= new BasicSupportPara(new BasicSupportHelper(), new BasicSupportImages());
50 * Check if the given resource is supported by this {@link BasicSupport}.
53 * the resource to check for
55 * @return TRUE if it is
57 protected abstract boolean supports(URL url
);
60 * Return TRUE if the support will return HTML encoded content values for
61 * the chapters content.
63 * @return TRUE for HTML
65 protected abstract boolean isHtml();
68 * Return the {@link MetaData} of this story.
70 * @return the associated {@link MetaData}, never NULL
73 * in case of I/O error
75 protected abstract MetaData
getMeta() throws IOException
;
78 * Return the story description.
80 * @return the description
83 * in case of I/O error
85 protected abstract String
getDesc() throws IOException
;
88 * Return the list of chapters (name and resource).
90 * Can be NULL if this {@link BasicSupport} do no use chapters.
93 * the optional progress reporter
95 * @return the chapters or NULL
98 * in case of I/O error
100 protected abstract List
<Entry
<String
, URL
>> getChapters(Progress pg
)
104 * Return the content of the chapter (possibly HTML encoded, if
105 * {@link BasicSupport#isHtml()} is TRUE).
108 * the chapter {@link URL}
112 * the optional progress reporter
114 * @return the content
116 * @throws IOException
117 * in case of I/O error
119 protected abstract String
getChapterContent(URL chapUrl
, int number
,
120 Progress pg
) throws IOException
;
123 * Return the list of cookies (values included) that must be used to
124 * correctly fetch the resources.
126 * You are expected to call the super method implementation if you override
129 * @return the cookies
131 public Map
<String
, String
> getCookies() {
132 return new HashMap
<String
, String
>();
136 * OAuth authorisation (aka, "bearer XXXXXXX").
138 * @return the OAuth string
140 public String
getOAuth() {
145 * Return the canonical form of the main {@link URL}.
148 * the source {@link URL}, which can be NULL
150 * @return the canonical form of this {@link URL} or NULL if the source was
153 protected URL
getCanonicalUrl(URL source
) {
158 * The main {@link Node} for this {@link Story}.
162 protected Element
getSourceNode() {
167 * The main {@link URL} for this {@link Story}.
171 protected URL
getSource() {
176 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
177 * the current {@link URL} we work on.
179 * @return the referer
181 public URL
getCurrentReferer() {
182 return currentReferer
;
186 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
187 * the current {@link URL} we work on.
189 * @param currentReferer
192 protected void setCurrentReferer(URL currentReferer
) {
193 this.currentReferer
= currentReferer
;
201 public SupportType
getType() {
211 protected void setType(SupportType type
) {
216 * Open an input link that will be used for the support.
218 * Can return NULL, in which case you are supposed to work without a source
222 * the source {@link URL}
224 * @return the {@link InputStream}
226 * @throws IOException
227 * in case of I/O error
229 protected Document
loadDocument(URL source
) throws IOException
{
230 String url
= getCanonicalUrl(source
).toString();
231 return DataUtil
.load(Instance
.getInstance().getCache().open(source
, this, false), "UTF-8", url
.toString());
235 * Log into the support (can be a no-op depending upon the support).
237 * @throws IOException
238 * in case of I/O error
240 protected void login() throws IOException
{
244 * Now that we have processed the {@link Story}, close the resources if any.
246 protected void close() {
247 setCurrentReferer(null);
251 * Process the given story resource into a partially filled {@link Story}
252 * object containing the name and metadata.
255 * retrieve the description of the story, or not
257 * the optional progress reporter
259 * @return the {@link Story}, never NULL
261 * @throws IOException
262 * in case of I/O error
264 protected Story
processMeta(boolean getDesc
, Progress pg
)
269 pg
.setMinMax(0, 100);
274 Story story
= new Story();
276 MetaData meta
= getMeta();
277 meta
.setType(getType().toString());
278 meta
.setSource(getType().getSourceName());
279 if (meta
.getPublisher() == null) {
280 meta
.setPublisher(getType().getSourceName());
283 if (meta
.getCreationDate() == null
284 || meta
.getCreationDate().trim().isEmpty()) {
285 meta
.setCreationDate(bsHelper
286 .formatDate(StringUtils
.fromTime(new Date().getTime())));
289 pg
.put("meta", meta
);
293 if (meta
.getCover() == null) {
294 meta
.setCover(bsHelper
.getDefaultCover(meta
.getSubject()));
300 String descChapterName
= Instance
.getInstance().getTrans().getString(StringId
.DESCRIPTION
);
301 story
.getMeta().setResume(bsPara
.makeChapter(this, source
, 0, descChapterName
, //
302 getDesc(), isHtml(), null));
310 * Utility method to convert the given URL into a JSON object.
312 * Note that this method expects small JSON files (everything is copied into
313 * memory at least twice).
318 * TRUE for more stable resources, FALSE when they often change
320 * @return the JSON object
322 * @throws IOException
323 * in case of I/O error
325 protected JSONObject
getJson(String url
, boolean stable
)
328 return getJson(new URL(url
), stable
);
329 } catch (MalformedURLException e
) {
330 throw new IOException("Malformed URL: " + url
, e
);
335 * Utility method to convert the given URL into a JSON object.
337 * Note that this method expects small JSON files (everything is copied into
338 * memory at least twice).
343 * TRUE for more stable resources, FALSE when they often change
345 * @return the JSON object
347 * @throws IOException
348 * in case of I/O error
350 protected JSONObject
getJson(URL url
, boolean stable
) throws IOException
{
351 InputStream in
= Instance
.getInstance().getCache().open(url
, null,
354 Scanner scan
= new Scanner(in
);
355 scan
.useDelimiter("\0");
358 throw new IOException("The given input was empty");
360 return new JSONObject(scan
.next());
361 } catch (JSONException e
) {
362 throw new IOException(e
);
372 * Process the given story resource into a fully filled {@link Story}
376 * the optional progress reporter
378 * @return the {@link Story}, never NULL
380 * @throws IOException
381 * in case of I/O error
383 // TODO: ADD final when BasicSupport_Deprecated is gone
384 public Story
process(Progress pg
) throws IOException
{
385 setCurrentReferer(source
);
387 sourceNode
= loadDocument(source
);
390 Story story
= doProcess(pg
);
392 // Check for "no chapters" stories
393 if (story
.getChapters().isEmpty()
394 && story
.getMeta().getResume() != null
395 && !story
.getMeta().getResume().getParagraphs().isEmpty()) {
396 Chapter resume
= story
.getMeta().getResume();
399 story
.getChapters().add(resume
);
400 story
.getMeta().setWords(resume
.getWords());
402 String descChapterName
= Instance
.getInstance().getTrans()
403 .getString(StringId
.DESCRIPTION
);
404 resume
= new Chapter(0, descChapterName
);
405 story
.getMeta().setResume(resume
);
415 * Actual processing step, without the calls to other methods.
417 * Will convert the story resource into a fully filled {@link Story} object.
420 * the optional progress reporter
422 * @return the {@link Story}, never NULL
424 * @throws IOException
425 * in case of I/O error
427 protected Story
doProcess(Progress pg
) throws IOException
{
431 pg
.setMinMax(0, 100);
434 pg
.setName("Initialising");
437 Progress pgMeta
= new Progress();
438 pg
.addProgress(pgMeta
, 10);
439 Story story
= processMeta(true, pgMeta
);
440 pgMeta
.done(); // 10%
441 pg
.put("meta", story
.getMeta());
443 Progress pgGetChapters
= new Progress();
444 pg
.addProgress(pgGetChapters
, 10);
445 story
.setChapters(new ArrayList
<Chapter
>());
446 List
<Entry
<String
, URL
>> chapters
= getChapters(pgGetChapters
);
447 pgGetChapters
.done(); // 20%
449 if (chapters
!= null) {
450 Progress pgChaps
= new Progress("Extracting chapters", 0,
451 chapters
.size() * 300);
452 pg
.addProgress(pgChaps
, 80);
456 for (Entry
<String
, URL
> chap
: chapters
) {
457 pgChaps
.setName("Extracting chapter " + i
);
458 URL chapUrl
= chap
.getValue();
459 String chapName
= chap
.getKey();
460 if (chapUrl
!= null) {
461 setCurrentReferer(chapUrl
);
464 pgChaps
.setProgress(i
* 100);
465 Progress pgGetChapterContent
= new Progress();
466 Progress pgMakeChapter
= new Progress();
467 pgChaps
.addProgress(pgGetChapterContent
, 100);
468 pgChaps
.addProgress(pgMakeChapter
, 100);
470 String content
= getChapterContent(chapUrl
, i
,
471 pgGetChapterContent
);
472 pgGetChapterContent
.done();
473 Chapter cc
= bsPara
.makeChapter(this, chapUrl
, i
,
474 chapName
, content
, isHtml(), pgMakeChapter
);
475 pgMakeChapter
.done();
477 words
+= cc
.getWords();
478 story
.getChapters().add(cc
);
483 story
.getMeta().setWords(words
);
485 pgChaps
.setName("Extracting chapters");
495 * Create a chapter from the given data.
498 * the source URL for this content, which can be used to try and
499 * find images if images are present in the format [image-url]
501 * the chapter number (0 = description)
505 * the content of the chapter
507 * @return the {@link Chapter}, never NULL
509 * @throws IOException
510 * in case of I/O error
512 public Chapter
makeChapter(URL source
, int number
, String name
,
513 String content
) throws IOException
{
514 return bsPara
.makeChapter(this, source
, number
, name
,
515 content
, isHtml(), null);
519 * Return a {@link BasicSupport} implementation supporting the given
520 * resource if possible.
525 * @return an implementation that supports it, or NULL
527 public static BasicSupport
getSupport(URL url
) {
532 // TEXT and INFO_TEXT always support files (not URLs though)
533 for (SupportType type
: SupportType
.values()) {
534 if (type
!= SupportType
.TEXT
&& type
!= SupportType
.INFO_TEXT
) {
535 BasicSupport support
= getSupport(type
, url
);
536 if (support
!= null && support
.supports(url
)) {
542 for (SupportType type
: new SupportType
[] { SupportType
.INFO_TEXT
,
543 SupportType
.TEXT
}) {
544 BasicSupport support
= getSupport(type
, url
);
545 if (support
!= null && support
.supports(url
)) {
554 * Return a {@link BasicSupport} implementation supporting the given type.
557 * the type, must not be NULL
559 * the {@link URL} to support (can be NULL to get an
560 * "abstract support"; if not NULL, will be used as the source
563 * @return an implementation that supports it, or NULL
565 public static BasicSupport
getSupport(SupportType type
, URL url
) {
566 BasicSupport support
= null;
570 support
= new Epub();
573 support
= new InfoText();
577 // Can fail if no client key or NO in options
578 support
= new FimfictionApi();
579 } catch (IOException e
) {
580 support
= new Fimfiction();
584 support
= new Fanfiction();
587 support
= new Text();
590 support
= new MangaHub();
593 support
= new E621();
596 support
= new YiffStar();
599 support
= new EHentai();
602 support
= new MangaLel();
608 support
= new Html();
612 if (support
!= null) {
613 support
.setType(type
);
614 support
.source
= support
.getCanonicalUrl(url
);