supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.MalformedURLException;
   6 import java.net.URL;
   7 import java.util.ArrayList;
   8 import java.util.Date;
   9 import java.util.HashMap;
  10 import java.util.List;
  11 import java.util.Map;
  12 import java.util.Scanner;
  13 import java.util.Map.Entry;
  14
  15 import org.json.JSONObject;
  16 import org.jsoup.helper.DataUtil;
  17 import org.jsoup.nodes.Document;
  18 import org.jsoup.nodes.Element;
  19 import org.jsoup.nodes.Node;
  20
  21 import be.nikiroo.fanfix.Instance;
  22 import be.nikiroo.fanfix.bundles.StringId;
  23 import be.nikiroo.fanfix.data.Chapter;
  24 import be.nikiroo.fanfix.data.MetaData;
  25 import be.nikiroo.fanfix.data.Story;
  26 import be.nikiroo.utils.Progress;
  27 import be.nikiroo.utils.StringUtils;
  28
  29 /**
  30  * This class is the base class used by the other support classes. It can be
  31  * used outside of this package, and have static method that you can use to get
  32  * access to the correct support class.
  33  * <p>
  34  * It will be used with 'resources' (usually web pages or files).
  35  *
  36  * @author niki
  37  */
  38 public abstract class BasicSupport {
  39         private Document sourceNode;
  40         private URL source;
  41         private SupportType type;
  42         private URL currentReferer; // with only one 'r', as in 'HTTP'...
  43
  44         static protected BasicSupportHelper bsHelper = new BasicSupportHelper();
  45         static protected BasicSupportImages bsImages = new BasicSupportImages();
  46         static protected BasicSupportPara bsPara = new BasicSupportPara(new BasicSupportHelper(), new BasicSupportImages());
  47
  48         /**
  49          * Check if the given resource is supported by this {@link BasicSupport}.
  50          *
  51          * @param url
  52          *            the resource to check for
  53          *
  54          * @return TRUE if it is
  55          */
  56         protected abstract boolean supports(URL url);
  57
  58         /**
  59          * Return TRUE if the support will return HTML encoded content values for
  60          * the chapters content.
  61          *
  62          * @return TRUE for HTML
  63          */
  64         protected abstract boolean isHtml();
  65
  66         /**
  67          * Return the {@link MetaData} of this story.
  68          *
  69          * @return the associated {@link MetaData}, never NULL
  70          *
  71          * @throws IOException
  72          *             in case of I/O error
  73          */
  74         protected abstract MetaData getMeta() throws IOException;
  75
  76         /**
  77          * Return the story description.
  78          *
  79          * @return the description
  80          *
  81          * @throws IOException
  82          *             in case of I/O error
  83          */
  84         protected abstract String getDesc() throws IOException;
  85
  86         /**
  87          * Return the list of chapters (name and resource).
  88          * <p>
  89          * Can be NULL if this {@link BasicSupport} do no use chapters.
  90          *
  91          * @param pg
  92          *            the optional progress reporter
  93          *
  94          * @return the chapters or NULL
  95          *
  96          * @throws IOException
  97          *             in case of I/O error
  98          */
  99         protected abstract List<Entry<String, URL>> getChapters(Progress pg)
 100                         throws IOException;
 101
 102         /**
 103          * Return the content of the chapter (possibly HTML encoded, if
 104          * {@link BasicSupport#isHtml()} is TRUE).
 105          *
 106          * @param chapUrl
 107          *            the chapter {@link URL}
 108          * @param number
 109          *            the chapter number
 110          * @param pg
 111          *            the optional progress reporter
 112          *
 113          * @return the content
 114          *
 115          * @throws IOException
 116          *             in case of I/O error
 117          */
 118         protected abstract String getChapterContent(URL chapUrl, int number,
 119                         Progress pg) throws IOException;
 120
 121         /**
 122          * Return the list of cookies (values included) that must be used to
 123          * correctly fetch the resources.
 124          * <p>
 125          * You are expected to call the super method implementation if you override
 126          * it.
 127          *
 128          * @return the cookies
 129          */
 130         public Map<String, String> getCookies() {
 131                 return new HashMap<String, String>();
 132         }
 133
 134         /**
 135          * OAuth authorisation (aka, "bearer XXXXXXX").
 136          *
 137          * @return the OAuth string
 138          */
 139         public String getOAuth() {
 140                 return null;
 141         }
 142
 143         /**
 144          * Return the canonical form of the main {@link URL}.
 145          *
 146          * @param source
 147          *            the source {@link URL}, which can be NULL
 148          *
 149          * @return the canonical form of this {@link URL} or NULL if the source was
 150          *         NULL
 151          */
 152         protected URL getCanonicalUrl(URL source) {
 153                 return source;
 154         }
 155
 156         /**
 157          * The main {@link Node} for this {@link Story}.
 158          *
 159          * @return the node
 160          */
 161         protected Element getSourceNode() {
 162                 return sourceNode;
 163         }
 164
 165         /**
 166          * The main {@link URL} for this {@link Story}.
 167          *
 168          * @return the URL
 169          */
 170         protected URL getSource() {
 171                 return source;
 172         }
 173
 174         /**
 175          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 176          * the current {@link URL} we work on.
 177          *
 178          * @return the referer
 179          */
 180         public URL getCurrentReferer() {
 181                 return currentReferer;
 182         }
 183
 184         /**
 185          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 186          * the current {@link URL} we work on.
 187          *
 188          * @param currentReferer
 189          *            the new referer
 190          */
 191         protected void setCurrentReferer(URL currentReferer) {
 192                 this.currentReferer = currentReferer;
 193         }
 194
 195         /**
 196          * The support type.
 197          *
 198          * @return the type
 199          */
 200         public SupportType getType() {
 201                 return type;
 202         }
 203
 204         /**
 205          * The support type.
 206          *
 207          * @param type
 208          *            the new type
 209          */
 210         protected void setType(SupportType type) {
 211                 this.type = type;
 212         }
 213
 214         /**
 215          * Open an input link that will be used for the support.
 216          * <p>
 217          * Can return NULL, in which case you are supposed to work without a source
 218          * node.
 219          *
 220          * @param source
 221          *            the source {@link URL}
 222          *
 223          * @return the {@link InputStream}
 224          *
 225          * @throws IOException
 226          *             in case of I/O error
 227          */
 228         protected Document loadDocument(URL source) throws IOException {
 229                 String url = getCanonicalUrl(source).toString();
 230                 return DataUtil.load(Instance.getInstance().getCache().open(source, this, false), "UTF-8", url.toString());
 231         }
 232
 233         /**
 234          * Log into the support (can be a no-op depending upon the support).
 235          *
 236          * @throws IOException
 237          *             in case of I/O error
 238          */
 239         protected void login() throws IOException {
 240         }
 241
 242         /**
 243          * Now that we have processed the {@link Story}, close the resources if any.
 244          */
 245         protected void close() {
 246                 setCurrentReferer(null);
 247         }
 248
 249         /**
 250          * Process the given story resource into a partially filled {@link Story}
 251          * object containing the name and metadata.
 252          *
 253          * @param getDesc
 254          *            retrieve the description of the story, or not
 255          * @param pg
 256          *            the optional progress reporter
 257          *
 258          * @return the {@link Story}, never NULL
 259          *
 260          * @throws IOException
 261          *             in case of I/O error
 262          */
 263         protected Story processMeta(boolean getDesc, Progress pg)
 264                         throws IOException {
 265                 if (pg == null) {
 266                         pg = new Progress();
 267                 } else {
 268                         pg.setMinMax(0, 100);
 269                 }
 270
 271                 pg.setProgress(30);
 272
 273                 Story story = new Story();
 274                 MetaData meta = getMeta();
 275                 if (meta.getCreationDate() == null
 276                                 || meta.getCreationDate().trim().isEmpty()) {
 277                         meta.setCreationDate(bsHelper
 278                                         .formatDate(StringUtils.fromTime(new Date().getTime())));
 279                 }
 280                 story.setMeta(meta);
 281                 pg.put("meta", meta);
 282
 283                 pg.setProgress(50);
 284
 285                 if (meta.getCover() == null) {
 286                         meta.setCover(bsHelper.getDefaultCover(meta.getSubject()));
 287                 }
 288
 289                 pg.setProgress(60);
 290
 291                 if (getDesc) {
 292                         String descChapterName = Instance.getInstance().getTrans().getString(StringId.DESCRIPTION);
 293                         story.getMeta().setResume(bsPara.makeChapter(this, source, 0, descChapterName, //
 294                                         getDesc(), isHtml(), null));
 295                 }
 296
 297                 pg.done();
 298                 return story;
 299         }
 300
 301         /**
 302          * Utility method to convert the given URL into a JSON object.
 303          * <p>
 304          * Note that this method expects small JSON files (everything is copied into
 305          * memory at least twice).
 306          *
 307          * @param url
 308          *            the URL to parse
 309          * @param stable
 310          *            TRUE for more stable resources, FALSE when they often change
 311          *
 312          * @return the JSON object
 313          *
 314          * @throws IOException
 315          *             in case of I/O error
 316          */
 317         protected JSONObject getJson(String url, boolean stable)
 318                         throws IOException {
 319                 try {
 320                         return getJson(new URL(url), stable);
 321                 } catch (MalformedURLException e) {
 322                         throw new IOException("Malformed URL: " + url, e);
 323                 }
 324         }
 325
 326         /**
 327          * Utility method to convert the given URL into a JSON object.
 328          * <p>
 329          * Note that this method expects small JSON files (everything is copied into
 330          * memory at least twice).
 331          *
 332          * @param url
 333          *            the URL to parse
 334          * @param stable
 335          *            TRUE for more stable resources, FALSE when they often change
 336          *
 337          * @return the JSON object
 338          *
 339          * @throws IOException
 340          *             in case of I/O error
 341          */
 342         protected JSONObject getJson(URL url, boolean stable) throws IOException {
 343                 InputStream in = Instance.getInstance().getCache().open(url, null,
 344                                 stable);
 345                 try {
 346                         Scanner scan = new Scanner(in);
 347                         scan.useDelimiter("\0");
 348                         try {
 349                                 return new JSONObject(scan.next());
 350                         } finally {
 351                                 scan.close();
 352                         }
 353                 } finally {
 354                         in.close();
 355                 }
 356         }
 357
 358         /**
 359          * Process the given story resource into a fully filled {@link Story}
 360          * object.
 361          *
 362          * @param pg
 363          *            the optional progress reporter
 364          *
 365          * @return the {@link Story}, never NULL
 366          *
 367          * @throws IOException
 368          *             in case of I/O error
 369          */
 370         // TODO: ADD final when BasicSupport_Deprecated is gone
 371         public Story process(Progress pg) throws IOException {
 372                 setCurrentReferer(source);
 373                 login();
 374                 sourceNode = loadDocument(source);
 375
 376                 try {
 377                         Story story = doProcess(pg);
 378
 379                         // Check for "no chapters" stories
 380                         if (story.getChapters().isEmpty()
 381                                         && story.getMeta().getResume() != null
 382                                         && !story.getMeta().getResume().getParagraphs().isEmpty()) {
 383                                 Chapter resume = story.getMeta().getResume();
 384                                 resume.setName("");
 385                                 resume.setNumber(1);
 386                                 story.getChapters().add(resume);
 387                                 story.getMeta().setWords(resume.getWords());
 388
 389                                 String descChapterName = Instance.getInstance().getTrans()
 390                                                 .getString(StringId.DESCRIPTION);
 391                                 resume = new Chapter(0, descChapterName);
 392                                 story.getMeta().setResume(resume);
 393                         }
 394
 395                         return story;
 396                 } finally {
 397                         close();
 398                 }
 399         }
 400
 401         /**
 402          * Actual processing step, without the calls to other methods.
 403          * <p>
 404          * Will convert the story resource into a fully filled {@link Story} object.
 405          *
 406          * @param pg
 407          *            the optional progress reporter
 408          *
 409          * @return the {@link Story}, never NULL
 410          *
 411          * @throws IOException
 412          *             in case of I/O error
 413          */
 414         protected Story doProcess(Progress pg) throws IOException {
 415                 if (pg == null) {
 416                         pg = new Progress();
 417                 } else {
 418                         pg.setMinMax(0, 100);
 419                 }
 420
 421                 pg.setName("Initialising");
 422
 423                 pg.setProgress(1);
 424                 Progress pgMeta = new Progress();
 425                 pg.addProgress(pgMeta, 10);
 426                 Story story = processMeta(true, pgMeta);
 427                 pgMeta.done(); // 10%
 428                 pg.put("meta", story.getMeta());
 429
 430                 Progress pgGetChapters = new Progress();
 431                 pg.addProgress(pgGetChapters, 10);
 432                 story.setChapters(new ArrayList<Chapter>());
 433                 List<Entry<String, URL>> chapters = getChapters(pgGetChapters);
 434                 pgGetChapters.done(); // 20%
 435
 436                 if (chapters != null) {
 437                         Progress pgChaps = new Progress("Extracting chapters", 0,
 438                                         chapters.size() * 300);
 439                         pg.addProgress(pgChaps, 80);
 440
 441                         long words = 0;
 442                         int i = 1;
 443                         for (Entry<String, URL> chap : chapters) {
 444                                 pgChaps.setName("Extracting chapter " + i);
 445                                 URL chapUrl = chap.getValue();
 446                                 String chapName = chap.getKey();
 447                                 if (chapUrl != null) {
 448                                         setCurrentReferer(chapUrl);
 449                                 }
 450
 451                                 pgChaps.setProgress(i * 100);
 452                                 Progress pgGetChapterContent = new Progress();
 453                                 Progress pgMakeChapter = new Progress();
 454                                 pgChaps.addProgress(pgGetChapterContent, 100);
 455                                 pgChaps.addProgress(pgMakeChapter, 100);
 456
 457                                 String content = getChapterContent(chapUrl, i,
 458                                                 pgGetChapterContent);
 459                                 pgGetChapterContent.done();
 460                                 Chapter cc = bsPara.makeChapter(this, chapUrl, i,
 461                                                 chapName, content, isHtml(), pgMakeChapter);
 462                                 pgMakeChapter.done();
 463
 464                                 words += cc.getWords();
 465                                 story.getChapters().add(cc);
 466
 467                                 i++;
 468                         }
 469
 470                         story.getMeta().setWords(words);
 471
 472                         pgChaps.setName("Extracting chapters");
 473                         pgChaps.done();
 474                 }
 475
 476                 pg.done();
 477
 478                 return story;
 479         }
 480
 481         /**
 482          * Create a chapter from the given data.
 483          *
 484          * @param source
 485          *            the source URL for this content, which can be used to try and
 486          *            find images if images are present in the format [image-url]
 487          * @param number
 488          *            the chapter number (0 = description)
 489          * @param name
 490          *            the chapter name
 491          * @param content
 492          *            the content of the chapter
 493          *
 494          * @return the {@link Chapter}, never NULL
 495          *
 496          * @throws IOException
 497          *             in case of I/O error
 498          */
 499         public Chapter makeChapter(URL source, int number, String name,
 500                         String content) throws IOException {
 501                 return bsPara.makeChapter(this, source, number, name,
 502                                 content, isHtml(), null);
 503         }
 504
 505         /**
 506          * Return a {@link BasicSupport} implementation supporting the given
 507          * resource if possible.
 508          *
 509          * @param url
 510          *            the story resource
 511          *
 512          * @return an implementation that supports it, or NULL
 513          */
 514         public static BasicSupport getSupport(URL url) {
 515                 if (url == null) {
 516                         return null;
 517                 }
 518
 519                 // TEXT and INFO_TEXT always support files (not URLs though)
 520                 for (SupportType type : SupportType.values()) {
 521                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
 522                                 BasicSupport support = getSupport(type, url);
 523                                 if (support != null && support.supports(url)) {
 524                                         return support;
 525                                 }
 526                         }
 527                 }
 528
 529                 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
 530                                 SupportType.TEXT }) {
 531                         BasicSupport support = getSupport(type, url);
 532                         if (support != null && support.supports(url)) {
 533                                 return support;
 534                         }
 535                 }
 536
 537                 return null;
 538         }
 539
 540         /**
 541          * Return a {@link BasicSupport} implementation supporting the given type.
 542          *
 543          * @param type
 544          *            the type, must not be NULL
 545          * @param url
 546          *            the {@link URL} to support (can be NULL to get an
 547          *            "abstract support"; if not NULL, will be used as the source
 548          *            URL)
 549          *
 550          * @return an implementation that supports it, or NULL
 551          */
 552         public static BasicSupport getSupport(SupportType type, URL url) {
 553                 BasicSupport support = null;
 554
 555                 switch (type) {
 556                 case EPUB:
 557                         support = new Epub();
 558                         break;
 559                 case INFO_TEXT:
 560                         support = new InfoText();
 561                         break;
 562                 case FIMFICTION:
 563                         try {
 564                                 // Can fail if no client key or NO in options
 565                                 support = new FimfictionApi();
 566                         } catch (IOException e) {
 567                                 support = new Fimfiction();
 568                         }
 569                         break;
 570                 case FANFICTION:
 571                         support = new Fanfiction();
 572                         break;
 573                 case TEXT:
 574                         support = new Text();
 575                         break;
 576                 case MANGAHUB:
 577                         support = new MangaHub();
 578                         break;
 579                 case E621:
 580                         support = new E621();
 581                         break;
 582                 case YIFFSTAR:
 583                         support = new YiffStar();
 584                         break;
 585                 case E_HENTAI:
 586                         support = new EHentai();
 587                         break;
 588                 case MANGA_LEL:
 589                         support = new MangaLel();
 590                         break;
 591                 case CBZ:
 592                         support = new Cbz();
 593                         break;
 594                 case HTML:
 595                         support = new Html();
 596                         break;
 597                 }
 598
 599                 if (support != null) {
 600                         support.setType(type);
 601                         support.source = support.getCanonicalUrl(url);
 602                 }
 603
 604                 return support;
 605         }
 606 }