src/be/nikiroo/fanfix/supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.MalformedURLException;
   6 import java.net.URL;
   7 import java.util.ArrayList;
   8 import java.util.Date;
   9 import java.util.HashMap;
  10 import java.util.List;
  11 import java.util.Map;
  12 import java.util.Scanner;
  13 import java.util.Map.Entry;
  14
  15 import org.json.JSONException;
  16 import org.json.JSONObject;
  17 import org.jsoup.helper.DataUtil;
  18 import org.jsoup.nodes.Document;
  19 import org.jsoup.nodes.Element;
  20 import org.jsoup.nodes.Node;
  21
  22 import be.nikiroo.fanfix.Instance;
  23 import be.nikiroo.fanfix.bundles.StringId;
  24 import be.nikiroo.fanfix.data.Chapter;
  25 import be.nikiroo.fanfix.data.MetaData;
  26 import be.nikiroo.fanfix.data.Story;
  27 import be.nikiroo.utils.Progress;
  28 import be.nikiroo.utils.StringUtils;
  29
  30 /**
  31  * This class is the base class used by the other support classes. It can be
  32  * used outside of this package, and have static method that you can use to get
  33  * access to the correct support class.
  34  * <p>
  35  * It will be used with 'resources' (usually web pages or files).
  36  *
  37  * @author niki
  38  */
  39 public abstract class BasicSupport {
  40         private Document sourceNode;
  41         private URL source;
  42         private SupportType type;
  43         private URL currentReferer; // with only one 'r', as in 'HTTP'...
  44
  45         static protected BasicSupportHelper bsHelper = new BasicSupportHelper();
  46         static protected BasicSupportImages bsImages = new BasicSupportImages();
  47         static protected BasicSupportPara bsPara = new BasicSupportPara(new BasicSupportHelper(), new BasicSupportImages());
  48
  49         /**
  50          * Check if the given resource is supported by this {@link BasicSupport}.
  51          *
  52          * @param url
  53          *            the resource to check for
  54          *
  55          * @return TRUE if it is
  56          */
  57         protected abstract boolean supports(URL url);
  58
  59         /**
  60          * Return TRUE if the support will return HTML encoded content values for
  61          * the chapters content.
  62          *
  63          * @return TRUE for HTML
  64          */
  65         protected abstract boolean isHtml();
  66
  67         /**
  68          * Return the {@link MetaData} of this story.
  69          *
  70          * @return the associated {@link MetaData}, never NULL
  71          *
  72          * @throws IOException
  73          *             in case of I/O error
  74          */
  75         protected abstract MetaData getMeta() throws IOException;
  76
  77         /**
  78          * Return the story description.
  79          *
  80          * @return the description
  81          *
  82          * @throws IOException
  83          *             in case of I/O error
  84          */
  85         protected abstract String getDesc() throws IOException;
  86
  87         /**
  88          * Return the list of chapters (name and resource).
  89          * <p>
  90          * Can be NULL if this {@link BasicSupport} do no use chapters.
  91          *
  92          * @param pg
  93          *            the optional progress reporter
  94          *
  95          * @return the chapters or NULL
  96          *
  97          * @throws IOException
  98          *             in case of I/O error
  99          */
 100         protected abstract List<Entry<String, URL>> getChapters(Progress pg)
 101                         throws IOException;
 102
 103         /**
 104          * Return the content of the chapter (possibly HTML encoded, if
 105          * {@link BasicSupport#isHtml()} is TRUE).
 106          *
 107          * @param chapUrl
 108          *            the chapter {@link URL}
 109          * @param number
 110          *            the chapter number
 111          * @param pg
 112          *            the optional progress reporter
 113          *
 114          * @return the content
 115          *
 116          * @throws IOException
 117          *             in case of I/O error
 118          */
 119         protected abstract String getChapterContent(URL chapUrl, int number,
 120                         Progress pg) throws IOException;
 121
 122         /**
 123          * Return the list of cookies (values included) that must be used to
 124          * correctly fetch the resources.
 125          * <p>
 126          * You are expected to call the super method implementation if you override
 127          * it.
 128          *
 129          * @return the cookies
 130          */
 131         public Map<String, String> getCookies() {
 132                 return new HashMap<String, String>();
 133         }
 134
 135         /**
 136          * OAuth authorisation (aka, "bearer XXXXXXX").
 137          *
 138          * @return the OAuth string
 139          */
 140         public String getOAuth() {
 141                 return null;
 142         }
 143
 144         /**
 145          * Return the canonical form of the main {@link URL}.
 146          *
 147          * @param source
 148          *            the source {@link URL}, which can be NULL
 149          *
 150          * @return the canonical form of this {@link URL} or NULL if the source was
 151          *         NULL
 152          */
 153         protected URL getCanonicalUrl(URL source) {
 154                 return source;
 155         }
 156
 157         /**
 158          * The main {@link Node} for this {@link Story}.
 159          *
 160          * @return the node
 161          */
 162         protected Element getSourceNode() {
 163                 return sourceNode;
 164         }
 165
 166         /**
 167          * The main {@link URL} for this {@link Story}.
 168          *
 169          * @return the URL
 170          */
 171         protected URL getSource() {
 172                 return source;
 173         }
 174
 175         /**
 176          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 177          * the current {@link URL} we work on.
 178          *
 179          * @return the referer
 180          */
 181         public URL getCurrentReferer() {
 182                 return currentReferer;
 183         }
 184
 185         /**
 186          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 187          * the current {@link URL} we work on.
 188          *
 189          * @param currentReferer
 190          *            the new referer
 191          */
 192         protected void setCurrentReferer(URL currentReferer) {
 193                 this.currentReferer = currentReferer;
 194         }
 195
 196         /**
 197          * The support type.
 198          *
 199          * @return the type
 200          */
 201         public SupportType getType() {
 202                 return type;
 203         }
 204
 205         /**
 206          * The support type.
 207          *
 208          * @param type
 209          *            the new type
 210          */
 211         protected void setType(SupportType type) {
 212                 this.type = type;
 213         }
 214
 215         /**
 216          * Open an input link that will be used for the support.
 217          * <p>
 218          * Can return NULL, in which case you are supposed to work without a source
 219          * node.
 220          *
 221          * @param source
 222          *            the source {@link URL}
 223          *
 224          * @return the {@link InputStream}
 225          *
 226          * @throws IOException
 227          *             in case of I/O error
 228          */
 229         protected Document loadDocument(URL source) throws IOException {
 230                 String url = getCanonicalUrl(source).toString();
 231                 return DataUtil.load(Instance.getInstance().getCache().open(source, this, false), "UTF-8", url.toString());
 232         }
 233
 234         /**
 235          * Log into the support (can be a no-op depending upon the support).
 236          *
 237          * @throws IOException
 238          *             in case of I/O error
 239          */
 240         protected void login() throws IOException {
 241         }
 242
 243         /**
 244          * Now that we have processed the {@link Story}, close the resources if any.
 245          */
 246         protected void close() {
 247                 setCurrentReferer(null);
 248         }
 249
 250         /**
 251          * Process the given story resource into a partially filled {@link Story}
 252          * object containing the name and metadata.
 253          *
 254          * @param getDesc
 255          *            retrieve the description of the story, or not
 256          * @param pg
 257          *            the optional progress reporter
 258          *
 259          * @return the {@link Story}, never NULL
 260          *
 261          * @throws IOException
 262          *             in case of I/O error
 263          */
 264         protected Story processMeta(boolean getDesc, Progress pg)
 265                         throws IOException {
 266                 if (pg == null) {
 267                         pg = new Progress();
 268                 } else {
 269                         pg.setMinMax(0, 100);
 270                 }
 271
 272                 pg.setProgress(30);
 273
 274                 Story story = new Story();
 275
 276                 MetaData meta = getMeta();
 277                 meta.setType(getType().toString());
 278                 meta.setSource(getType().getSourceName());
 279                 if (meta.getPublisher() == null) {
 280                         meta.setPublisher(getType().getSourceName());
 281                 }
 282
 283                 if (meta.getCreationDate() == null
 284                                 || meta.getCreationDate().trim().isEmpty()) {
 285                         meta.setCreationDate(bsHelper
 286                                         .formatDate(StringUtils.fromTime(new Date().getTime())));
 287                 }
 288                 story.setMeta(meta);
 289                 pg.put("meta", meta);
 290
 291                 pg.setProgress(50);
 292
 293                 if (meta.getCover() == null) {
 294                         meta.setCover(bsHelper.getDefaultCover(meta.getSubject()));
 295                 }
 296
 297                 pg.setProgress(60);
 298
 299                 if (getDesc) {
 300                         String descChapterName = Instance.getInstance().getTrans().getString(StringId.DESCRIPTION);
 301                         story.getMeta().setResume(bsPara.makeChapter(this, source, 0, descChapterName, //
 302                                         getDesc(), isHtml(), null));
 303                 }
 304
 305                 pg.done();
 306                 return story;
 307         }
 308
 309         /**
 310          * Utility method to convert the given URL into a JSON object.
 311          * <p>
 312          * Note that this method expects small JSON files (everything is copied into
 313          * memory at least twice).
 314          *
 315          * @param url
 316          *            the URL to parse
 317          * @param stable
 318          *            TRUE for more stable resources, FALSE when they often change
 319          *
 320          * @return the JSON object
 321          *
 322          * @throws IOException
 323          *             in case of I/O error
 324          */
 325         protected JSONObject getJson(String url, boolean stable)
 326                         throws IOException {
 327                 try {
 328                         return getJson(new URL(url), stable);
 329                 } catch (MalformedURLException e) {
 330                         throw new IOException("Malformed URL: " + url, e);
 331                 }
 332         }
 333
 334         /**
 335          * Utility method to convert the given URL into a JSON object.
 336          * <p>
 337          * Note that this method expects small JSON files (everything is copied into
 338          * memory at least twice).
 339          *
 340          * @param url
 341          *            the URL to parse
 342          * @param stable
 343          *            TRUE for more stable resources, FALSE when they often change
 344          *
 345          * @return the JSON object
 346          *
 347          * @throws IOException
 348          *             in case of I/O error
 349          */
 350         protected JSONObject getJson(URL url, boolean stable) throws IOException {
 351                 InputStream in = Instance.getInstance().getCache().open(url, null,
 352                                 stable);
 353                 try {
 354                         Scanner scan = new Scanner(in);
 355                         scan.useDelimiter("\0");
 356                         try {
 357                                 return new JSONObject(scan.next());
 358                         } catch (JSONException e) {
 359                                 throw new IOException(e);
 360                         } finally {
 361                                 scan.close();
 362                         }
 363                 } finally {
 364                         in.close();
 365                 }
 366         }
 367
 368         /**
 369          * Process the given story resource into a fully filled {@link Story}
 370          * object.
 371          *
 372          * @param pg
 373          *            the optional progress reporter
 374          *
 375          * @return the {@link Story}, never NULL
 376          *
 377          * @throws IOException
 378          *             in case of I/O error
 379          */
 380         // TODO: ADD final when BasicSupport_Deprecated is gone
 381         public Story process(Progress pg) throws IOException {
 382                 setCurrentReferer(source);
 383                 login();
 384                 sourceNode = loadDocument(source);
 385
 386                 try {
 387                         Story story = doProcess(pg);
 388
 389                         // Check for "no chapters" stories
 390                         if (story.getChapters().isEmpty()
 391                                         && story.getMeta().getResume() != null
 392                                         && !story.getMeta().getResume().getParagraphs().isEmpty()) {
 393                                 Chapter resume = story.getMeta().getResume();
 394                                 resume.setName("");
 395                                 resume.setNumber(1);
 396                                 story.getChapters().add(resume);
 397                                 story.getMeta().setWords(resume.getWords());
 398
 399                                 String descChapterName = Instance.getInstance().getTrans()
 400                                                 .getString(StringId.DESCRIPTION);
 401                                 resume = new Chapter(0, descChapterName);
 402                                 story.getMeta().setResume(resume);
 403                         }
 404
 405                         return story;
 406                 } finally {
 407                         close();
 408                 }
 409         }
 410
 411         /**
 412          * Actual processing step, without the calls to other methods.
 413          * <p>
 414          * Will convert the story resource into a fully filled {@link Story} object.
 415          *
 416          * @param pg
 417          *            the optional progress reporter
 418          *
 419          * @return the {@link Story}, never NULL
 420          *
 421          * @throws IOException
 422          *             in case of I/O error
 423          */
 424         protected Story doProcess(Progress pg) throws IOException {
 425                 if (pg == null) {
 426                         pg = new Progress();
 427                 } else {
 428                         pg.setMinMax(0, 100);
 429                 }
 430
 431                 pg.setName("Initialising");
 432
 433                 pg.setProgress(1);
 434                 Progress pgMeta = new Progress();
 435                 pg.addProgress(pgMeta, 10);
 436                 Story story = processMeta(true, pgMeta);
 437                 pgMeta.done(); // 10%
 438                 pg.put("meta", story.getMeta());
 439
 440                 Progress pgGetChapters = new Progress();
 441                 pg.addProgress(pgGetChapters, 10);
 442                 story.setChapters(new ArrayList<Chapter>());
 443                 List<Entry<String, URL>> chapters = getChapters(pgGetChapters);
 444                 pgGetChapters.done(); // 20%
 445
 446                 if (chapters != null) {
 447                         Progress pgChaps = new Progress("Extracting chapters", 0,
 448                                         chapters.size() * 300);
 449                         pg.addProgress(pgChaps, 80);
 450
 451                         long words = 0;
 452                         int i = 1;
 453                         for (Entry<String, URL> chap : chapters) {
 454                                 pgChaps.setName("Extracting chapter " + i);
 455                                 URL chapUrl = chap.getValue();
 456                                 String chapName = chap.getKey();
 457                                 if (chapUrl != null) {
 458                                         setCurrentReferer(chapUrl);
 459                                 }
 460
 461                                 pgChaps.setProgress(i * 100);
 462                                 Progress pgGetChapterContent = new Progress();
 463                                 Progress pgMakeChapter = new Progress();
 464                                 pgChaps.addProgress(pgGetChapterContent, 100);
 465                                 pgChaps.addProgress(pgMakeChapter, 100);
 466
 467                                 String content = getChapterContent(chapUrl, i,
 468                                                 pgGetChapterContent);
 469                                 pgGetChapterContent.done();
 470                                 Chapter cc = bsPara.makeChapter(this, chapUrl, i,
 471                                                 chapName, content, isHtml(), pgMakeChapter);
 472                                 pgMakeChapter.done();
 473
 474                                 words += cc.getWords();
 475                                 story.getChapters().add(cc);
 476
 477                                 i++;
 478                         }
 479
 480                         story.getMeta().setWords(words);
 481
 482                         pgChaps.setName("Extracting chapters");
 483                         pgChaps.done();
 484                 }
 485
 486                 pg.done();
 487
 488                 return story;
 489         }
 490
 491         /**
 492          * Create a chapter from the given data.
 493          *
 494          * @param source
 495          *            the source URL for this content, which can be used to try and
 496          *            find images if images are present in the format [image-url]
 497          * @param number
 498          *            the chapter number (0 = description)
 499          * @param name
 500          *            the chapter name
 501          * @param content
 502          *            the content of the chapter
 503          *
 504          * @return the {@link Chapter}, never NULL
 505          *
 506          * @throws IOException
 507          *             in case of I/O error
 508          */
 509         public Chapter makeChapter(URL source, int number, String name,
 510                         String content) throws IOException {
 511                 return bsPara.makeChapter(this, source, number, name,
 512                                 content, isHtml(), null);
 513         }
 514
 515         /**
 516          * Return a {@link BasicSupport} implementation supporting the given
 517          * resource if possible.
 518          *
 519          * @param url
 520          *            the story resource
 521          *
 522          * @return an implementation that supports it, or NULL
 523          */
 524         public static BasicSupport getSupport(URL url) {
 525                 if (url == null) {
 526                         return null;
 527                 }
 528
 529                 // TEXT and INFO_TEXT always support files (not URLs though)
 530                 for (SupportType type : SupportType.values()) {
 531                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
 532                                 BasicSupport support = getSupport(type, url);
 533                                 if (support != null && support.supports(url)) {
 534                                         return support;
 535                                 }
 536                         }
 537                 }
 538
 539                 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
 540                                 SupportType.TEXT }) {
 541                         BasicSupport support = getSupport(type, url);
 542                         if (support != null && support.supports(url)) {
 543                                 return support;
 544                         }
 545                 }
 546
 547                 return null;
 548         }
 549
 550         /**
 551          * Return a {@link BasicSupport} implementation supporting the given type.
 552          *
 553          * @param type
 554          *            the type, must not be NULL
 555          * @param url
 556          *            the {@link URL} to support (can be NULL to get an
 557          *            "abstract support"; if not NULL, will be used as the source
 558          *            URL)
 559          *
 560          * @return an implementation that supports it, or NULL
 561          */
 562         public static BasicSupport getSupport(SupportType type, URL url) {
 563                 BasicSupport support = null;
 564
 565                 switch (type) {
 566                 case EPUB:
 567                         support = new Epub();
 568                         break;
 569                 case INFO_TEXT:
 570                         support = new InfoText();
 571                         break;
 572                 case FIMFICTION:
 573                         try {
 574                                 // Can fail if no client key or NO in options
 575                                 support = new FimfictionApi();
 576                         } catch (IOException e) {
 577                                 support = new Fimfiction();
 578                         }
 579                         break;
 580                 case FANFICTION:
 581                         support = new Fanfiction();
 582                         break;
 583                 case TEXT:
 584                         support = new Text();
 585                         break;
 586                 case MANGAHUB:
 587                         support = new MangaHub();
 588                         break;
 589                 case E621:
 590                         support = new E621();
 591                         break;
 592                 case YIFFSTAR:
 593                         support = new YiffStar();
 594                         break;
 595                 case E_HENTAI:
 596                         support = new EHentai();
 597                         break;
 598                 case MANGA_LEL:
 599                         support = new MangaLel();
 600                         break;
 601                 case CBZ:
 602                         support = new Cbz();
 603                         break;
 604                 case HTML:
 605                         support = new Html();
 606                         break;
 607                 }
 608
 609                 if (support != null) {
 610                         support.setType(type);
 611                         support.source = support.getCanonicalUrl(url);
 612                 }
 613
 614                 return support;
 615         }
 616 }