fanfix/supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.MalformedURLException;
   6 import java.net.URL;
   7 import java.util.ArrayList;
   8 import java.util.Date;
   9 import java.util.HashMap;
  10 import java.util.List;
  11 import java.util.Map;
  12 import java.util.Scanner;
  13 import java.util.Map.Entry;
  14
  15 import org.json.JSONException;
  16 import org.json.JSONObject;
  17 import org.jsoup.helper.DataUtil;
  18 import org.jsoup.nodes.Document;
  19 import org.jsoup.nodes.Element;
  20 import org.jsoup.nodes.Node;
  21
  22 import be.nikiroo.fanfix.Instance;
  23 import be.nikiroo.fanfix.bundles.StringId;
  24 import be.nikiroo.fanfix.data.Chapter;
  25 import be.nikiroo.fanfix.data.MetaData;
  26 import be.nikiroo.fanfix.data.Story;
  27 import be.nikiroo.utils.Progress;
  28 import be.nikiroo.utils.StringUtils;
  29
  30 /**
  31  * This class is the base class used by the other support classes. It can be
  32  * used outside of this package, and have static method that you can use to get
  33  * access to the correct support class.
  34  * <p>
  35  * It will be used with 'resources' (usually web pages or files).
  36  *
  37  * @author niki
  38  */
  39 public abstract class BasicSupport {
  40         private Document sourceNode;
  41         private URL source;
  42         private SupportType type;
  43         private URL currentReferer; // with only one 'r', as in 'HTTP'...
  44
  45         static protected BasicSupportHelper bsHelper = new BasicSupportHelper();
  46         static protected BasicSupportImages bsImages = new BasicSupportImages();
  47         static protected BasicSupportPara bsPara = new BasicSupportPara(new BasicSupportHelper(), new BasicSupportImages());
  48
  49         /**
  50          * Check if the given resource is supported by this {@link BasicSupport}.
  51          *
  52          * @param url
  53          *            the resource to check for
  54          *
  55          * @return TRUE if it is
  56          */
  57         protected abstract boolean supports(URL url);
  58
  59         /**
  60          * Return TRUE if the support will return HTML encoded content values for
  61          * the chapters content.
  62          *
  63          * @return TRUE for HTML
  64          */
  65         protected abstract boolean isHtml();
  66
  67         /**
  68          * Return the {@link MetaData} of this story.
  69          *
  70          * @return the associated {@link MetaData}, never NULL
  71          *
  72          * @throws IOException
  73          *             in case of I/O error
  74          */
  75         protected abstract MetaData getMeta() throws IOException;
  76
  77         /**
  78          * Return the story description.
  79          *
  80          * @return the description
  81          *
  82          * @throws IOException
  83          *             in case of I/O error
  84          */
  85         protected abstract String getDesc() throws IOException;
  86
  87         /**
  88          * Return the list of chapters (name and resource).
  89          * <p>
  90          * Can be NULL if this {@link BasicSupport} do no use chapters.
  91          *
  92          * @param pg
  93          *            the optional progress reporter
  94          *
  95          * @return the chapters or NULL
  96          *
  97          * @throws IOException
  98          *             in case of I/O error
  99          */
 100         protected abstract List<Entry<String, URL>> getChapters(Progress pg)
 101                         throws IOException;
 102
 103         /**
 104          * Return the content of the chapter (possibly HTML encoded, if
 105          * {@link BasicSupport#isHtml()} is TRUE).
 106          *
 107          * @param chapUrl
 108          *            the chapter {@link URL}
 109          * @param number
 110          *            the chapter number
 111          * @param pg
 112          *            the optional progress reporter
 113          *
 114          * @return the content
 115          *
 116          * @throws IOException
 117          *             in case of I/O error
 118          */
 119         protected abstract String getChapterContent(URL chapUrl, int number,
 120                         Progress pg) throws IOException;
 121
 122         /**
 123          * Return the list of cookies (values included) that must be used to
 124          * correctly fetch the resources.
 125          * <p>
 126          * You are expected to call the super method implementation if you override
 127          * it.
 128          *
 129          * @return the cookies
 130          */
 131         public Map<String, String> getCookies() {
 132                 return new HashMap<String, String>();
 133         }
 134
 135         /**
 136          * OAuth authorisation (aka, "bearer XXXXXXX").
 137          *
 138          * @return the OAuth string
 139          */
 140         public String getOAuth() {
 141                 return null;
 142         }
 143
 144         /**
 145          * Return the canonical form of the main {@link URL}.
 146          *
 147          * @param source
 148          *            the source {@link URL}, which can be NULL
 149          *
 150          * @return the canonical form of this {@link URL} or NULL if the source was
 151          *         NULL
 152          */
 153         protected URL getCanonicalUrl(URL source) {
 154                 return source;
 155         }
 156
 157         /**
 158          * The main {@link Node} for this {@link Story}.
 159          *
 160          * @return the node
 161          */
 162         protected Element getSourceNode() {
 163                 return sourceNode;
 164         }
 165
 166         /**
 167          * The main {@link URL} for this {@link Story}.
 168          *
 169          * @return the URL
 170          */
 171         protected URL getSource() {
 172                 return source;
 173         }
 174
 175         /**
 176          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 177          * the current {@link URL} we work on.
 178          *
 179          * @return the referer
 180          */
 181         public URL getCurrentReferer() {
 182                 return currentReferer;
 183         }
 184
 185         /**
 186          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 187          * the current {@link URL} we work on.
 188          *
 189          * @param currentReferer
 190          *            the new referer
 191          */
 192         protected void setCurrentReferer(URL currentReferer) {
 193                 this.currentReferer = currentReferer;
 194         }
 195
 196         /**
 197          * The support type.
 198          *
 199          * @return the type
 200          */
 201         public SupportType getType() {
 202                 return type;
 203         }
 204
 205         /**
 206          * The support type.
 207          *
 208          * @param type
 209          *            the new type
 210          */
 211         protected void setType(SupportType type) {
 212                 this.type = type;
 213         }
 214
 215         /**
 216          * Open an input link that will be used for the support.
 217          * <p>
 218          * Can return NULL, in which case you are supposed to work without a source
 219          * node.
 220          *
 221          * @param source
 222          *            the source {@link URL}
 223          *
 224          * @return the {@link InputStream}
 225          *
 226          * @throws IOException
 227          *             in case of I/O error
 228          */
 229         protected Document loadDocument(URL source) throws IOException {
 230                 String url = getCanonicalUrl(source).toString();
 231                 return DataUtil.load(Instance.getInstance().getCache().open(source, this, false), "UTF-8", url.toString());
 232         }
 233
 234         /**
 235          * Log into the support (can be a no-op depending upon the support).
 236          *
 237          * @throws IOException
 238          *             in case of I/O error
 239          */
 240         protected void login() throws IOException {
 241         }
 242
 243         /**
 244          * Now that we have processed the {@link Story}, close the resources if any.
 245          */
 246         protected void close() {
 247                 setCurrentReferer(null);
 248         }
 249
 250         /**
 251          * Process the given story resource into a partially filled {@link Story}
 252          * object containing the name and metadata.
 253          *
 254          * @param getDesc
 255          *            retrieve the description of the story, or not
 256          * @param pg
 257          *            the optional progress reporter
 258          *
 259          * @return the {@link Story}, never NULL
 260          *
 261          * @throws IOException
 262          *             in case of I/O error
 263          */
 264         protected Story processMeta(boolean getDesc, Progress pg)
 265                         throws IOException {
 266                 if (pg == null) {
 267                         pg = new Progress();
 268                 } else {
 269                         pg.setMinMax(0, 100);
 270                 }
 271
 272                 pg.setProgress(30);
 273
 274                 Story story = new Story();
 275                 MetaData meta = getMeta();
 276                 if (meta.getCreationDate() == null
 277                                 || meta.getCreationDate().trim().isEmpty()) {
 278                         meta.setCreationDate(bsHelper
 279                                         .formatDate(StringUtils.fromTime(new Date().getTime())));
 280                 }
 281                 story.setMeta(meta);
 282                 pg.put("meta", meta);
 283
 284                 pg.setProgress(50);
 285
 286                 if (meta.getCover() == null) {
 287                         meta.setCover(bsHelper.getDefaultCover(meta.getSubject()));
 288                 }
 289
 290                 pg.setProgress(60);
 291
 292                 if (getDesc) {
 293                         String descChapterName = Instance.getInstance().getTrans().getString(StringId.DESCRIPTION);
 294                         story.getMeta().setResume(bsPara.makeChapter(this, source, 0, descChapterName, //
 295                                         getDesc(), isHtml(), null));
 296                 }
 297
 298                 pg.done();
 299                 return story;
 300         }
 301
 302         /**
 303          * Utility method to convert the given URL into a JSON object.
 304          * <p>
 305          * Note that this method expects small JSON files (everything is copied into
 306          * memory at least twice).
 307          *
 308          * @param url
 309          *            the URL to parse
 310          * @param stable
 311          *            TRUE for more stable resources, FALSE when they often change
 312          *
 313          * @return the JSON object
 314          *
 315          * @throws IOException
 316          *             in case of I/O error
 317          */
 318         protected JSONObject getJson(String url, boolean stable)
 319                         throws IOException {
 320                 try {
 321                         return getJson(new URL(url), stable);
 322                 } catch (MalformedURLException e) {
 323                         throw new IOException("Malformed URL: " + url, e);
 324                 }
 325         }
 326
 327         /**
 328          * Utility method to convert the given URL into a JSON object.
 329          * <p>
 330          * Note that this method expects small JSON files (everything is copied into
 331          * memory at least twice).
 332          *
 333          * @param url
 334          *            the URL to parse
 335          * @param stable
 336          *            TRUE for more stable resources, FALSE when they often change
 337          *
 338          * @return the JSON object
 339          *
 340          * @throws IOException
 341          *             in case of I/O error
 342          */
 343         protected JSONObject getJson(URL url, boolean stable) throws IOException {
 344                 InputStream in = Instance.getInstance().getCache().open(url, null,
 345                                 stable);
 346                 try {
 347                         Scanner scan = new Scanner(in);
 348                         scan.useDelimiter("\0");
 349                         try {
 350                                 return new JSONObject(scan.next());
 351                         } catch (JSONException e) {
 352                                 throw new IOException(e);
 353                         } finally {
 354                                 scan.close();
 355                         }
 356                 } finally {
 357                         in.close();
 358                 }
 359         }
 360
 361         /**
 362          * Process the given story resource into a fully filled {@link Story}
 363          * object.
 364          *
 365          * @param pg
 366          *            the optional progress reporter
 367          *
 368          * @return the {@link Story}, never NULL
 369          *
 370          * @throws IOException
 371          *             in case of I/O error
 372          */
 373         // TODO: ADD final when BasicSupport_Deprecated is gone
 374         public Story process(Progress pg) throws IOException {
 375                 setCurrentReferer(source);
 376                 login();
 377                 sourceNode = loadDocument(source);
 378
 379                 try {
 380                         Story story = doProcess(pg);
 381
 382                         // Check for "no chapters" stories
 383                         if (story.getChapters().isEmpty()
 384                                         && story.getMeta().getResume() != null
 385                                         && !story.getMeta().getResume().getParagraphs().isEmpty()) {
 386                                 Chapter resume = story.getMeta().getResume();
 387                                 resume.setName("");
 388                                 resume.setNumber(1);
 389                                 story.getChapters().add(resume);
 390                                 story.getMeta().setWords(resume.getWords());
 391
 392                                 String descChapterName = Instance.getInstance().getTrans()
 393                                                 .getString(StringId.DESCRIPTION);
 394                                 resume = new Chapter(0, descChapterName);
 395                                 story.getMeta().setResume(resume);
 396                         }
 397
 398                         return story;
 399                 } finally {
 400                         close();
 401                 }
 402         }
 403
 404         /**
 405          * Actual processing step, without the calls to other methods.
 406          * <p>
 407          * Will convert the story resource into a fully filled {@link Story} object.
 408          *
 409          * @param pg
 410          *            the optional progress reporter
 411          *
 412          * @return the {@link Story}, never NULL
 413          *
 414          * @throws IOException
 415          *             in case of I/O error
 416          */
 417         protected Story doProcess(Progress pg) throws IOException {
 418                 if (pg == null) {
 419                         pg = new Progress();
 420                 } else {
 421                         pg.setMinMax(0, 100);
 422                 }
 423
 424                 pg.setName("Initialising");
 425
 426                 pg.setProgress(1);
 427                 Progress pgMeta = new Progress();
 428                 pg.addProgress(pgMeta, 10);
 429                 Story story = processMeta(true, pgMeta);
 430                 pgMeta.done(); // 10%
 431                 pg.put("meta", story.getMeta());
 432
 433                 Progress pgGetChapters = new Progress();
 434                 pg.addProgress(pgGetChapters, 10);
 435                 story.setChapters(new ArrayList<Chapter>());
 436                 List<Entry<String, URL>> chapters = getChapters(pgGetChapters);
 437                 pgGetChapters.done(); // 20%
 438
 439                 if (chapters != null) {
 440                         Progress pgChaps = new Progress("Extracting chapters", 0,
 441                                         chapters.size() * 300);
 442                         pg.addProgress(pgChaps, 80);
 443
 444                         long words = 0;
 445                         int i = 1;
 446                         for (Entry<String, URL> chap : chapters) {
 447                                 pgChaps.setName("Extracting chapter " + i);
 448                                 URL chapUrl = chap.getValue();
 449                                 String chapName = chap.getKey();
 450                                 if (chapUrl != null) {
 451                                         setCurrentReferer(chapUrl);
 452                                 }
 453
 454                                 pgChaps.setProgress(i * 100);
 455                                 Progress pgGetChapterContent = new Progress();
 456                                 Progress pgMakeChapter = new Progress();
 457                                 pgChaps.addProgress(pgGetChapterContent, 100);
 458                                 pgChaps.addProgress(pgMakeChapter, 100);
 459
 460                                 String content = getChapterContent(chapUrl, i,
 461                                                 pgGetChapterContent);
 462                                 pgGetChapterContent.done();
 463                                 Chapter cc = bsPara.makeChapter(this, chapUrl, i,
 464                                                 chapName, content, isHtml(), pgMakeChapter);
 465                                 pgMakeChapter.done();
 466
 467                                 words += cc.getWords();
 468                                 story.getChapters().add(cc);
 469
 470                                 i++;
 471                         }
 472
 473                         story.getMeta().setWords(words);
 474
 475                         pgChaps.setName("Extracting chapters");
 476                         pgChaps.done();
 477                 }
 478
 479                 pg.done();
 480
 481                 return story;
 482         }
 483
 484         /**
 485          * Create a chapter from the given data.
 486          *
 487          * @param source
 488          *            the source URL for this content, which can be used to try and
 489          *            find images if images are present in the format [image-url]
 490          * @param number
 491          *            the chapter number (0 = description)
 492          * @param name
 493          *            the chapter name
 494          * @param content
 495          *            the content of the chapter
 496          *
 497          * @return the {@link Chapter}, never NULL
 498          *
 499          * @throws IOException
 500          *             in case of I/O error
 501          */
 502         public Chapter makeChapter(URL source, int number, String name,
 503                         String content) throws IOException {
 504                 return bsPara.makeChapter(this, source, number, name,
 505                                 content, isHtml(), null);
 506         }
 507
 508         /**
 509          * Return a {@link BasicSupport} implementation supporting the given
 510          * resource if possible.
 511          *
 512          * @param url
 513          *            the story resource
 514          *
 515          * @return an implementation that supports it, or NULL
 516          */
 517         public static BasicSupport getSupport(URL url) {
 518                 if (url == null) {
 519                         return null;
 520                 }
 521
 522                 // TEXT and INFO_TEXT always support files (not URLs though)
 523                 for (SupportType type : SupportType.values()) {
 524                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
 525                                 BasicSupport support = getSupport(type, url);
 526                                 if (support != null && support.supports(url)) {
 527                                         return support;
 528                                 }
 529                         }
 530                 }
 531
 532                 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
 533                                 SupportType.TEXT }) {
 534                         BasicSupport support = getSupport(type, url);
 535                         if (support != null && support.supports(url)) {
 536                                 return support;
 537                         }
 538                 }
 539
 540                 return null;
 541         }
 542
 543         /**
 544          * Return a {@link BasicSupport} implementation supporting the given type.
 545          *
 546          * @param type
 547          *            the type, must not be NULL
 548          * @param url
 549          *            the {@link URL} to support (can be NULL to get an
 550          *            "abstract support"; if not NULL, will be used as the source
 551          *            URL)
 552          *
 553          * @return an implementation that supports it, or NULL
 554          */
 555         public static BasicSupport getSupport(SupportType type, URL url) {
 556                 BasicSupport support = null;
 557
 558                 switch (type) {
 559                 case EPUB:
 560                         support = new Epub();
 561                         break;
 562                 case INFO_TEXT:
 563                         support = new InfoText();
 564                         break;
 565                 case FIMFICTION:
 566                         try {
 567                                 // Can fail if no client key or NO in options
 568                                 support = new FimfictionApi();
 569                         } catch (IOException e) {
 570                                 support = new Fimfiction();
 571                         }
 572                         break;
 573                 case FANFICTION:
 574                         support = new Fanfiction();
 575                         break;
 576                 case TEXT:
 577                         support = new Text();
 578                         break;
 579                 case MANGAHUB:
 580                         support = new MangaHub();
 581                         break;
 582                 case E621:
 583                         support = new E621();
 584                         break;
 585                 case YIFFSTAR:
 586                         support = new YiffStar();
 587                         break;
 588                 case E_HENTAI:
 589                         support = new EHentai();
 590                         break;
 591                 case MANGA_LEL:
 592                         support = new MangaLel();
 593                         break;
 594                 case CBZ:
 595                         support = new Cbz();
 596                         break;
 597                 case HTML:
 598                         support = new Html();
 599                         break;
 600                 }
 601
 602                 if (support != null) {
 603                         support.setType(type);
 604                         support.source = support.getCanonicalUrl(url);
 605                 }
 606
 607                 return support;
 608         }
 609 }