src/be/nikiroo/fanfix/supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.MalformedURLException;
   6 import java.net.URL;
   7 import java.util.ArrayList;
   8 import java.util.Date;
   9 import java.util.HashMap;
  10 import java.util.List;
  11 import java.util.Map;
  12 import java.util.Scanner;
  13 import java.util.Map.Entry;
  14
  15 import org.json.JSONException;
  16 import org.json.JSONObject;
  17 import org.jsoup.helper.DataUtil;
  18 import org.jsoup.nodes.Document;
  19 import org.jsoup.nodes.Element;
  20 import org.jsoup.nodes.Node;
  21
  22 import be.nikiroo.fanfix.Instance;
  23 import be.nikiroo.fanfix.bundles.StringId;
  24 import be.nikiroo.fanfix.data.Chapter;
  25 import be.nikiroo.fanfix.data.MetaData;
  26 import be.nikiroo.fanfix.data.Story;
  27 import be.nikiroo.utils.Progress;
  28 import be.nikiroo.utils.StringUtils;
  29
  30 /**
  31  * This class is the base class used by the other support classes. It can be
  32  * used outside of this package, and have static method that you can use to get
  33  * access to the correct support class.
  34  * <p>
  35  * It will be used with 'resources' (usually web pages or files).
  36  *
  37  * @author niki
  38  */
  39 public abstract class BasicSupport {
  40         private Document sourceNode;
  41         private URL source;
  42         private SupportType type;
  43         private URL currentReferer; // with only one 'r', as in 'HTTP'...
  44
  45         static protected BasicSupportHelper bsHelper = new BasicSupportHelper();
  46         static protected BasicSupportImages bsImages = new BasicSupportImages();
  47         static protected BasicSupportPara bsPara = new BasicSupportPara(new BasicSupportHelper(), new BasicSupportImages());
  48
  49         /**
  50          * Check if the given resource is supported by this {@link BasicSupport}.
  51          *
  52          * @param url
  53          *            the resource to check for
  54          *
  55          * @return TRUE if it is
  56          */
  57         protected abstract boolean supports(URL url);
  58
  59         /**
  60          * Return TRUE if the support will return HTML encoded content values for
  61          * the chapters content.
  62          *
  63          * @return TRUE for HTML
  64          */
  65         protected abstract boolean isHtml();
  66
  67         /**
  68          * Return the {@link MetaData} of this story.
  69          *
  70          * @return the associated {@link MetaData}, never NULL
  71          *
  72          * @throws IOException
  73          *             in case of I/O error
  74          */
  75         protected abstract MetaData getMeta() throws IOException;
  76
  77         /**
  78          * Return the story description.
  79          *
  80          * @return the description
  81          *
  82          * @throws IOException
  83          *             in case of I/O error
  84          */
  85         protected abstract String getDesc() throws IOException;
  86
  87         /**
  88          * Return the list of chapters (name and resource).
  89          * <p>
  90          * Can be NULL if this {@link BasicSupport} do no use chapters.
  91          *
  92          * @param pg
  93          *            the optional progress reporter
  94          *
  95          * @return the chapters or NULL
  96          *
  97          * @throws IOException
  98          *             in case of I/O error
  99          */
 100         protected abstract List<Entry<String, URL>> getChapters(Progress pg)
 101                         throws IOException;
 102
 103         /**
 104          * Return the content of the chapter (possibly HTML encoded, if
 105          * {@link BasicSupport#isHtml()} is TRUE).
 106          *
 107          * @param chapUrl
 108          *            the chapter {@link URL}
 109          * @param number
 110          *            the chapter number
 111          * @param pg
 112          *            the optional progress reporter
 113          *
 114          * @return the content
 115          *
 116          * @throws IOException
 117          *             in case of I/O error
 118          */
 119         protected abstract String getChapterContent(URL chapUrl, int number,
 120                         Progress pg) throws IOException;
 121
 122         /**
 123          * Return the list of cookies (values included) that must be used to
 124          * correctly fetch the resources.
 125          * <p>
 126          * You are expected to call the super method implementation if you override
 127          * it.
 128          *
 129          * @return the cookies
 130          */
 131         public Map<String, String> getCookies() {
 132                 return new HashMap<String, String>();
 133         }
 134
 135         /**
 136          * OAuth authorisation (aka, "bearer XXXXXXX").
 137          *
 138          * @return the OAuth string
 139          */
 140         public String getOAuth() {
 141                 return null;
 142         }
 143
 144         /**
 145          * Return the canonical form of the main {@link URL}.
 146          *
 147          * @param source
 148          *            the source {@link URL}, which can be NULL
 149          *
 150          * @return the canonical form of this {@link URL} or NULL if the source was
 151          *         NULL
 152          */
 153         protected URL getCanonicalUrl(URL source) {
 154                 return source;
 155         }
 156
 157         /**
 158          * The main {@link Node} for this {@link Story}.
 159          *
 160          * @return the node
 161          */
 162         protected Element getSourceNode() {
 163                 return sourceNode;
 164         }
 165
 166         /**
 167          * The main {@link URL} for this {@link Story}.
 168          *
 169          * @return the URL
 170          */
 171         protected URL getSource() {
 172                 return source;
 173         }
 174
 175         /**
 176          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 177          * the current {@link URL} we work on.
 178          *
 179          * @return the referer
 180          */
 181         public URL getCurrentReferer() {
 182                 return currentReferer;
 183         }
 184
 185         /**
 186          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 187          * the current {@link URL} we work on.
 188          *
 189          * @param currentReferer
 190          *            the new referer
 191          */
 192         protected void setCurrentReferer(URL currentReferer) {
 193                 this.currentReferer = currentReferer;
 194         }
 195
 196         /**
 197          * The support type.
 198          *
 199          * @return the type
 200          */
 201         public SupportType getType() {
 202                 return type;
 203         }
 204
 205         /**
 206          * The support type.
 207          *
 208          * @param type
 209          *            the new type
 210          */
 211         protected void setType(SupportType type) {
 212                 this.type = type;
 213         }
 214
 215         /**
 216          * Open an input link that will be used for the support.
 217          * <p>
 218          * Can return NULL, in which case you are supposed to work without a source
 219          * node.
 220          *
 221          * @param source
 222          *            the source {@link URL}
 223          *
 224          * @return the {@link InputStream}
 225          *
 226          * @throws IOException
 227          *             in case of I/O error
 228          */
 229         protected Document loadDocument(URL source) throws IOException {
 230                 String url = getCanonicalUrl(source).toString();
 231                 return DataUtil.load(Instance.getInstance().getCache().open(source, this, false), "UTF-8", url.toString());
 232         }
 233
 234         /**
 235          * Log into the support (can be a no-op depending upon the support).
 236          *
 237          * @throws IOException
 238          *             in case of I/O error
 239          */
 240         protected void login() throws IOException {
 241         }
 242
 243         /**
 244          * Now that we have processed the {@link Story}, close the resources if any.
 245          */
 246         protected void close() {
 247                 setCurrentReferer(null);
 248         }
 249
 250         /**
 251          * Process the given story resource into a partially filled {@link Story}
 252          * object containing the name and metadata.
 253          *
 254          * @param getDesc
 255          *            retrieve the description of the story, or not
 256          * @param pg
 257          *            the optional progress reporter
 258          *
 259          * @return the {@link Story}, never NULL
 260          *
 261          * @throws IOException
 262          *             in case of I/O error
 263          */
 264         protected Story processMeta(boolean getDesc, Progress pg)
 265                         throws IOException {
 266                 if (pg == null) {
 267                         pg = new Progress();
 268                 } else {
 269                         pg.setMinMax(0, 100);
 270                 }
 271
 272                 pg.setProgress(30);
 273
 274                 Story story = new Story();
 275
 276                 MetaData meta = getMeta();
 277                 meta.setType(getType().toString());
 278                 meta.setSource(getType().getSourceName());
 279                 meta.setPublisher(getType().getSourceName());
 280
 281                 if (meta.getCreationDate() == null
 282                                 || meta.getCreationDate().trim().isEmpty()) {
 283                         meta.setCreationDate(bsHelper
 284                                         .formatDate(StringUtils.fromTime(new Date().getTime())));
 285                 }
 286                 story.setMeta(meta);
 287                 pg.put("meta", meta);
 288
 289                 pg.setProgress(50);
 290
 291                 if (meta.getCover() == null) {
 292                         meta.setCover(bsHelper.getDefaultCover(meta.getSubject()));
 293                 }
 294
 295                 pg.setProgress(60);
 296
 297                 if (getDesc) {
 298                         String descChapterName = Instance.getInstance().getTrans().getString(StringId.DESCRIPTION);
 299                         story.getMeta().setResume(bsPara.makeChapter(this, source, 0, descChapterName, //
 300                                         getDesc(), isHtml(), null));
 301                 }
 302
 303                 pg.done();
 304                 return story;
 305         }
 306
 307         /**
 308          * Utility method to convert the given URL into a JSON object.
 309          * <p>
 310          * Note that this method expects small JSON files (everything is copied into
 311          * memory at least twice).
 312          *
 313          * @param url
 314          *            the URL to parse
 315          * @param stable
 316          *            TRUE for more stable resources, FALSE when they often change
 317          *
 318          * @return the JSON object
 319          *
 320          * @throws IOException
 321          *             in case of I/O error
 322          */
 323         protected JSONObject getJson(String url, boolean stable)
 324                         throws IOException {
 325                 try {
 326                         return getJson(new URL(url), stable);
 327                 } catch (MalformedURLException e) {
 328                         throw new IOException("Malformed URL: " + url, e);
 329                 }
 330         }
 331
 332         /**
 333          * Utility method to convert the given URL into a JSON object.
 334          * <p>
 335          * Note that this method expects small JSON files (everything is copied into
 336          * memory at least twice).
 337          *
 338          * @param url
 339          *            the URL to parse
 340          * @param stable
 341          *            TRUE for more stable resources, FALSE when they often change
 342          *
 343          * @return the JSON object
 344          *
 345          * @throws IOException
 346          *             in case of I/O error
 347          */
 348         protected JSONObject getJson(URL url, boolean stable) throws IOException {
 349                 InputStream in = Instance.getInstance().getCache().open(url, null,
 350                                 stable);
 351                 try {
 352                         Scanner scan = new Scanner(in);
 353                         scan.useDelimiter("\0");
 354                         try {
 355                                 return new JSONObject(scan.next());
 356                         } catch (JSONException e) {
 357                                 throw new IOException(e);
 358                         } finally {
 359                                 scan.close();
 360                         }
 361                 } finally {
 362                         in.close();
 363                 }
 364         }
 365
 366         /**
 367          * Process the given story resource into a fully filled {@link Story}
 368          * object.
 369          *
 370          * @param pg
 371          *            the optional progress reporter
 372          *
 373          * @return the {@link Story}, never NULL
 374          *
 375          * @throws IOException
 376          *             in case of I/O error
 377          */
 378         // TODO: ADD final when BasicSupport_Deprecated is gone
 379         public Story process(Progress pg) throws IOException {
 380                 setCurrentReferer(source);
 381                 login();
 382                 sourceNode = loadDocument(source);
 383
 384                 try {
 385                         Story story = doProcess(pg);
 386
 387                         // Check for "no chapters" stories
 388                         if (story.getChapters().isEmpty()
 389                                         && story.getMeta().getResume() != null
 390                                         && !story.getMeta().getResume().getParagraphs().isEmpty()) {
 391                                 Chapter resume = story.getMeta().getResume();
 392                                 resume.setName("");
 393                                 resume.setNumber(1);
 394                                 story.getChapters().add(resume);
 395                                 story.getMeta().setWords(resume.getWords());
 396
 397                                 String descChapterName = Instance.getInstance().getTrans()
 398                                                 .getString(StringId.DESCRIPTION);
 399                                 resume = new Chapter(0, descChapterName);
 400                                 story.getMeta().setResume(resume);
 401                         }
 402
 403                         return story;
 404                 } finally {
 405                         close();
 406                 }
 407         }
 408
 409         /**
 410          * Actual processing step, without the calls to other methods.
 411          * <p>
 412          * Will convert the story resource into a fully filled {@link Story} object.
 413          *
 414          * @param pg
 415          *            the optional progress reporter
 416          *
 417          * @return the {@link Story}, never NULL
 418          *
 419          * @throws IOException
 420          *             in case of I/O error
 421          */
 422         protected Story doProcess(Progress pg) throws IOException {
 423                 if (pg == null) {
 424                         pg = new Progress();
 425                 } else {
 426                         pg.setMinMax(0, 100);
 427                 }
 428
 429                 pg.setName("Initialising");
 430
 431                 pg.setProgress(1);
 432                 Progress pgMeta = new Progress();
 433                 pg.addProgress(pgMeta, 10);
 434                 Story story = processMeta(true, pgMeta);
 435                 pgMeta.done(); // 10%
 436                 pg.put("meta", story.getMeta());
 437
 438                 Progress pgGetChapters = new Progress();
 439                 pg.addProgress(pgGetChapters, 10);
 440                 story.setChapters(new ArrayList<Chapter>());
 441                 List<Entry<String, URL>> chapters = getChapters(pgGetChapters);
 442                 pgGetChapters.done(); // 20%
 443
 444                 if (chapters != null) {
 445                         Progress pgChaps = new Progress("Extracting chapters", 0,
 446                                         chapters.size() * 300);
 447                         pg.addProgress(pgChaps, 80);
 448
 449                         long words = 0;
 450                         int i = 1;
 451                         for (Entry<String, URL> chap : chapters) {
 452                                 pgChaps.setName("Extracting chapter " + i);
 453                                 URL chapUrl = chap.getValue();
 454                                 String chapName = chap.getKey();
 455                                 if (chapUrl != null) {
 456                                         setCurrentReferer(chapUrl);
 457                                 }
 458
 459                                 pgChaps.setProgress(i * 100);
 460                                 Progress pgGetChapterContent = new Progress();
 461                                 Progress pgMakeChapter = new Progress();
 462                                 pgChaps.addProgress(pgGetChapterContent, 100);
 463                                 pgChaps.addProgress(pgMakeChapter, 100);
 464
 465                                 String content = getChapterContent(chapUrl, i,
 466                                                 pgGetChapterContent);
 467                                 pgGetChapterContent.done();
 468                                 Chapter cc = bsPara.makeChapter(this, chapUrl, i,
 469                                                 chapName, content, isHtml(), pgMakeChapter);
 470                                 pgMakeChapter.done();
 471
 472                                 words += cc.getWords();
 473                                 story.getChapters().add(cc);
 474
 475                                 i++;
 476                         }
 477
 478                         story.getMeta().setWords(words);
 479
 480                         pgChaps.setName("Extracting chapters");
 481                         pgChaps.done();
 482                 }
 483
 484                 pg.done();
 485
 486                 return story;
 487         }
 488
 489         /**
 490          * Create a chapter from the given data.
 491          *
 492          * @param source
 493          *            the source URL for this content, which can be used to try and
 494          *            find images if images are present in the format [image-url]
 495          * @param number
 496          *            the chapter number (0 = description)
 497          * @param name
 498          *            the chapter name
 499          * @param content
 500          *            the content of the chapter
 501          *
 502          * @return the {@link Chapter}, never NULL
 503          *
 504          * @throws IOException
 505          *             in case of I/O error
 506          */
 507         public Chapter makeChapter(URL source, int number, String name,
 508                         String content) throws IOException {
 509                 return bsPara.makeChapter(this, source, number, name,
 510                                 content, isHtml(), null);
 511         }
 512
 513         /**
 514          * Return a {@link BasicSupport} implementation supporting the given
 515          * resource if possible.
 516          *
 517          * @param url
 518          *            the story resource
 519          *
 520          * @return an implementation that supports it, or NULL
 521          */
 522         public static BasicSupport getSupport(URL url) {
 523                 if (url == null) {
 524                         return null;
 525                 }
 526
 527                 // TEXT and INFO_TEXT always support files (not URLs though)
 528                 for (SupportType type : SupportType.values()) {
 529                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
 530                                 BasicSupport support = getSupport(type, url);
 531                                 if (support != null && support.supports(url)) {
 532                                         return support;
 533                                 }
 534                         }
 535                 }
 536
 537                 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
 538                                 SupportType.TEXT }) {
 539                         BasicSupport support = getSupport(type, url);
 540                         if (support != null && support.supports(url)) {
 541                                 return support;
 542                         }
 543                 }
 544
 545                 return null;
 546         }
 547
 548         /**
 549          * Return a {@link BasicSupport} implementation supporting the given type.
 550          *
 551          * @param type
 552          *            the type, must not be NULL
 553          * @param url
 554          *            the {@link URL} to support (can be NULL to get an
 555          *            "abstract support"; if not NULL, will be used as the source
 556          *            URL)
 557          *
 558          * @return an implementation that supports it, or NULL
 559          */
 560         public static BasicSupport getSupport(SupportType type, URL url) {
 561                 BasicSupport support = null;
 562
 563                 switch (type) {
 564                 case EPUB:
 565                         support = new Epub();
 566                         break;
 567                 case INFO_TEXT:
 568                         support = new InfoText();
 569                         break;
 570                 case FIMFICTION:
 571                         try {
 572                                 // Can fail if no client key or NO in options
 573                                 support = new FimfictionApi();
 574                         } catch (IOException e) {
 575                                 support = new Fimfiction();
 576                         }
 577                         break;
 578                 case FANFICTION:
 579                         support = new Fanfiction();
 580                         break;
 581                 case TEXT:
 582                         support = new Text();
 583                         break;
 584                 case MANGAHUB:
 585                         support = new MangaHub();
 586                         break;
 587                 case E621:
 588                         support = new E621();
 589                         break;
 590                 case YIFFSTAR:
 591                         support = new YiffStar();
 592                         break;
 593                 case E_HENTAI:
 594                         support = new EHentai();
 595                         break;
 596                 case MANGA_LEL:
 597                         support = new MangaLel();
 598                         break;
 599                 case CBZ:
 600                         support = new Cbz();
 601                         break;
 602                 case HTML:
 603                         support = new Html();
 604                         break;
 605                 }
 606
 607                 if (support != null) {
 608                         support.setType(type);
 609                         support.source = support.getCanonicalUrl(url);
 610                 }
 611
 612                 return support;
 613         }
 614 }