1 package be
.nikiroo
.fanfix
.supported
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
5 import java
.net
.MalformedURLException
;
7 import java
.util
.ArrayList
;
9 import java
.util
.HashMap
;
10 import java
.util
.List
;
12 import java
.util
.Scanner
;
13 import java
.util
.Map
.Entry
;
15 import org
.json
.JSONObject
;
16 import org
.jsoup
.helper
.DataUtil
;
17 import org
.jsoup
.nodes
.Document
;
18 import org
.jsoup
.nodes
.Element
;
19 import org
.jsoup
.nodes
.Node
;
21 import be
.nikiroo
.fanfix
.Instance
;
22 import be
.nikiroo
.fanfix
.bundles
.StringId
;
23 import be
.nikiroo
.fanfix
.data
.Chapter
;
24 import be
.nikiroo
.fanfix
.data
.MetaData
;
25 import be
.nikiroo
.fanfix
.data
.Story
;
26 import be
.nikiroo
.utils
.Progress
;
27 import be
.nikiroo
.utils
.StringUtils
;
30 * This class is the base class used by the other support classes. It can be
31 * used outside of this package, and have static method that you can use to get
32 * access to the correct support class.
34 * It will be used with 'resources' (usually web pages or files).
38 public abstract class BasicSupport
{
39 private Document sourceNode
;
41 private SupportType type
;
42 private URL currentReferer
; // with only one 'r', as in 'HTTP'...
44 static protected BasicSupportHelper bsHelper
= new BasicSupportHelper();
45 static protected BasicSupportImages bsImages
= new BasicSupportImages();
46 static protected BasicSupportPara bsPara
= new BasicSupportPara(new BasicSupportHelper(), new BasicSupportImages());
49 * Check if the given resource is supported by this {@link BasicSupport}.
52 * the resource to check for
54 * @return TRUE if it is
56 protected abstract boolean supports(URL url
);
59 * Return TRUE if the support will return HTML encoded content values for
60 * the chapters content.
62 * @return TRUE for HTML
64 protected abstract boolean isHtml();
67 * Return the {@link MetaData} of this story.
69 * @return the associated {@link MetaData}, never NULL
72 * in case of I/O error
74 protected abstract MetaData
getMeta() throws IOException
;
77 * Return the story description.
79 * @return the description
82 * in case of I/O error
84 protected abstract String
getDesc() throws IOException
;
87 * Return the list of chapters (name and resource).
89 * Can be NULL if this {@link BasicSupport} do no use chapters.
92 * the optional progress reporter
94 * @return the chapters or NULL
97 * in case of I/O error
99 protected abstract List
<Entry
<String
, URL
>> getChapters(Progress pg
)
103 * Return the content of the chapter (possibly HTML encoded, if
104 * {@link BasicSupport#isHtml()} is TRUE).
107 * the chapter {@link URL}
111 * the optional progress reporter
113 * @return the content
115 * @throws IOException
116 * in case of I/O error
118 protected abstract String
getChapterContent(URL chapUrl
, int number
,
119 Progress pg
) throws IOException
;
122 * Return the list of cookies (values included) that must be used to
123 * correctly fetch the resources.
125 * You are expected to call the super method implementation if you override
128 * @return the cookies
130 public Map
<String
, String
> getCookies() {
131 return new HashMap
<String
, String
>();
135 * OAuth authorisation (aka, "bearer XXXXXXX").
137 * @return the OAuth string
139 public String
getOAuth() {
144 * Return the canonical form of the main {@link URL}.
147 * the source {@link URL}, which can be NULL
149 * @return the canonical form of this {@link URL} or NULL if the source was
152 protected URL
getCanonicalUrl(URL source
) {
157 * The main {@link Node} for this {@link Story}.
161 protected Element
getSourceNode() {
166 * The main {@link URL} for this {@link Story}.
170 protected URL
getSource() {
175 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
176 * the current {@link URL} we work on.
178 * @return the referer
180 public URL
getCurrentReferer() {
181 return currentReferer
;
185 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
186 * the current {@link URL} we work on.
188 * @param currentReferer
191 protected void setCurrentReferer(URL currentReferer
) {
192 this.currentReferer
= currentReferer
;
200 public SupportType
getType() {
210 protected void setType(SupportType type
) {
215 * Open an input link that will be used for the support.
217 * Can return NULL, in which case you are supposed to work without a source
221 * the source {@link URL}
223 * @return the {@link InputStream}
225 * @throws IOException
226 * in case of I/O error
228 protected Document
loadDocument(URL source
) throws IOException
{
229 String url
= getCanonicalUrl(source
).toString();
230 return DataUtil
.load(Instance
.getInstance().getCache().open(source
, this, false), "UTF-8", url
.toString());
234 * Log into the support (can be a no-op depending upon the support).
236 * @throws IOException
237 * in case of I/O error
239 protected void login() throws IOException
{
243 * Now that we have processed the {@link Story}, close the resources if any.
245 protected void close() {
246 setCurrentReferer(null);
250 * Process the given story resource into a partially filled {@link Story}
251 * object containing the name and metadata.
254 * retrieve the description of the story, or not
256 * the optional progress reporter
258 * @return the {@link Story}, never NULL
260 * @throws IOException
261 * in case of I/O error
263 protected Story
processMeta(boolean getDesc
, Progress pg
)
268 pg
.setMinMax(0, 100);
273 Story story
= new Story();
274 MetaData meta
= getMeta();
275 if (meta
.getCreationDate() == null || meta
.getCreationDate().isEmpty()) {
276 meta
.setCreationDate(StringUtils
.fromTime(new Date().getTime()));
279 pg
.put("meta", meta
);
283 if (meta
.getCover() == null) {
284 meta
.setCover(bsHelper
.getDefaultCover(meta
.getSubject()));
290 String descChapterName
= Instance
.getInstance().getTrans().getString(StringId
.DESCRIPTION
);
291 story
.getMeta().setResume(bsPara
.makeChapter(this, source
, 0, descChapterName
, //
292 getDesc(), isHtml(), null));
300 * Utility method to convert the given URL into a JSON object.
302 * Note that this method expects small JSON files (everything is copied into
303 * memory at least twice).
308 * TRUE for more stable resources, FALSE when they often change
310 * @return the JSON object
312 * @throws IOException
313 * in case of I/O error
315 protected JSONObject
getJson(String url
, boolean stable
)
318 return getJson(new URL(url
), stable
);
319 } catch (MalformedURLException e
) {
320 throw new IOException("Malformed URL: " + url
, e
);
325 * Utility method to convert the given URL into a JSON object.
327 * Note that this method expects small JSON files (everything is copied into
328 * memory at least twice).
333 * TRUE for more stable resources, FALSE when they often change
335 * @return the JSON object
337 * @throws IOException
338 * in case of I/O error
340 protected JSONObject
getJson(URL url
, boolean stable
) throws IOException
{
341 InputStream in
= Instance
.getInstance().getCache().open(url
, null,
344 Scanner scan
= new Scanner(in
);
345 scan
.useDelimiter("\0");
347 return new JSONObject(scan
.next());
357 * Process the given story resource into a fully filled {@link Story}
361 * the optional progress reporter
363 * @return the {@link Story}, never NULL
365 * @throws IOException
366 * in case of I/O error
368 // TODO: ADD final when BasicSupport_Deprecated is gone
369 public Story
process(Progress pg
) throws IOException
{
370 setCurrentReferer(source
);
372 sourceNode
= loadDocument(source
);
375 return doProcess(pg
);
382 * Actual processing step, without the calls to other methods.
384 * Will convert the story resource into a fully filled {@link Story} object.
387 * the optional progress reporter
389 * @return the {@link Story}, never NULL
391 * @throws IOException
392 * in case of I/O error
394 protected Story
doProcess(Progress pg
) throws IOException
{
398 pg
.setMinMax(0, 100);
401 pg
.setName("Initialising");
404 Progress pgMeta
= new Progress();
405 pg
.addProgress(pgMeta
, 10);
406 Story story
= processMeta(true, pgMeta
);
407 pgMeta
.done(); // 10%
408 pg
.put("meta", story
.getMeta());
410 Progress pgGetChapters
= new Progress();
411 pg
.addProgress(pgGetChapters
, 10);
412 story
.setChapters(new ArrayList
<Chapter
>());
413 List
<Entry
<String
, URL
>> chapters
= getChapters(pgGetChapters
);
414 pgGetChapters
.done(); // 20%
416 if (chapters
!= null) {
417 Progress pgChaps
= new Progress("Extracting chapters", 0,
418 chapters
.size() * 300);
419 pg
.addProgress(pgChaps
, 80);
423 for (Entry
<String
, URL
> chap
: chapters
) {
424 pgChaps
.setName("Extracting chapter " + i
);
425 URL chapUrl
= chap
.getValue();
426 String chapName
= chap
.getKey();
427 if (chapUrl
!= null) {
428 setCurrentReferer(chapUrl
);
431 pgChaps
.setProgress(i
* 100);
432 Progress pgGetChapterContent
= new Progress();
433 Progress pgMakeChapter
= new Progress();
434 pgChaps
.addProgress(pgGetChapterContent
, 100);
435 pgChaps
.addProgress(pgMakeChapter
, 100);
437 String content
= getChapterContent(chapUrl
, i
,
438 pgGetChapterContent
);
439 pgGetChapterContent
.done();
440 Chapter cc
= bsPara
.makeChapter(this, chapUrl
, i
,
441 chapName
, content
, isHtml(), pgMakeChapter
);
442 pgMakeChapter
.done();
444 words
+= cc
.getWords();
445 story
.getChapters().add(cc
);
446 story
.getMeta().setWords(words
);
451 pgChaps
.setName("Extracting chapters");
461 * Create a chapter from the given data.
464 * the source URL for this content, which can be used to try and
465 * find images if images are present in the format [image-url]
467 * the chapter number (0 = description)
471 * the content of the chapter
472 * @return the {@link Chapter}
474 * @throws IOException
475 * in case of I/O error
477 public Chapter
makeChapter(URL source
, int number
, String name
,
478 String content
) throws IOException
{
479 return bsPara
.makeChapter(this, source
, number
, name
,
480 content
, isHtml(), null);
484 * Return a {@link BasicSupport} implementation supporting the given
485 * resource if possible.
490 * @return an implementation that supports it, or NULL
492 public static BasicSupport
getSupport(URL url
) {
497 // TEXT and INFO_TEXT always support files (not URLs though)
498 for (SupportType type
: SupportType
.values()) {
499 if (type
!= SupportType
.TEXT
&& type
!= SupportType
.INFO_TEXT
) {
500 BasicSupport support
= getSupport(type
, url
);
501 if (support
!= null && support
.supports(url
)) {
507 for (SupportType type
: new SupportType
[] { SupportType
.INFO_TEXT
,
508 SupportType
.TEXT
}) {
509 BasicSupport support
= getSupport(type
, url
);
510 if (support
!= null && support
.supports(url
)) {
519 * Return a {@link BasicSupport} implementation supporting the given type.
522 * the type, must not be NULL
524 * the {@link URL} to support (can be NULL to get an
525 * "abstract support"; if not NULL, will be used as the source
528 * @return an implementation that supports it, or NULL
530 public static BasicSupport
getSupport(SupportType type
, URL url
) {
531 BasicSupport support
= null;
535 support
= new Epub();
538 support
= new InfoText();
542 // Can fail if no client key or NO in options
543 support
= new FimfictionApi();
544 } catch (IOException e
) {
545 support
= new Fimfiction();
549 support
= new Fanfiction();
552 support
= new Text();
555 support
= new MangaHub();
558 support
= new E621();
561 support
= new YiffStar();
564 support
= new EHentai();
567 support
= new MangaLel();
573 support
= new Html();
577 if (support
!= null) {
578 support
.setType(type
);
579 support
.source
= support
.getCanonicalUrl(url
);