supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.MalformedURLException;
   6 import java.net.URL;
   7 import java.util.ArrayList;
   8 import java.util.Date;
   9 import java.util.HashMap;
  10 import java.util.List;
  11 import java.util.Map;
  12 import java.util.Scanner;
  13 import java.util.Map.Entry;
  14
  15 import org.json.JSONException;
  16 import org.json.JSONObject;
  17 import org.jsoup.helper.DataUtil;
  18 import org.jsoup.nodes.Document;
  19 import org.jsoup.nodes.Element;
  20 import org.jsoup.nodes.Node;
  21
  22 import be.nikiroo.fanfix.Instance;
  23 import be.nikiroo.fanfix.bundles.StringId;
  24 import be.nikiroo.fanfix.data.Chapter;
  25 import be.nikiroo.fanfix.data.MetaData;
  26 import be.nikiroo.fanfix.data.Story;
  27 import be.nikiroo.utils.Progress;
  28 import be.nikiroo.utils.StringUtils;
  29
  30 /**
  31  * This class is the base class used by the other support classes. It can be
  32  * used outside of this package, and have static method that you can use to get
  33  * access to the correct support class.
  34  * <p>
  35  * It will be used with 'resources' (usually web pages or files).
  36  *
  37  * @author niki
  38  */
  39 public abstract class BasicSupport {
  40         private Document sourceNode;
  41         private URL source;
  42         private SupportType type;
  43         private URL currentReferer; // with only one 'r', as in 'HTTP'...
  44
  45         static protected BasicSupportHelper bsHelper = new BasicSupportHelper();
  46         static protected BasicSupportImages bsImages = new BasicSupportImages();
  47         static protected BasicSupportPara bsPara = new BasicSupportPara(new BasicSupportHelper(), new BasicSupportImages());
  48
  49         /**
  50          * Check if the given resource is supported by this {@link BasicSupport}.
  51          *
  52          * @param url
  53          *            the resource to check for
  54          *
  55          * @return TRUE if it is
  56          */
  57         protected abstract boolean supports(URL url);
  58
  59         /**
  60          * Return TRUE if the support will return HTML encoded content values for
  61          * the chapters content.
  62          *
  63          * @return TRUE for HTML
  64          */
  65         protected abstract boolean isHtml();
  66
  67         /**
  68          * Return the {@link MetaData} of this story.
  69          *
  70          * @return the associated {@link MetaData}, never NULL
  71          *
  72          * @throws IOException
  73          *             in case of I/O error
  74          */
  75         protected abstract MetaData getMeta() throws IOException;
  76
  77         /**
  78          * Return the story description.
  79          *
  80          * @return the description
  81          *
  82          * @throws IOException
  83          *             in case of I/O error
  84          */
  85         protected abstract String getDesc() throws IOException;
  86
  87         /**
  88          * Return the list of chapters (name and resource).
  89          * <p>
  90          * Can be NULL if this {@link BasicSupport} do no use chapters.
  91          *
  92          * @param pg
  93          *            the optional progress reporter
  94          *
  95          * @return the chapters or NULL
  96          *
  97          * @throws IOException
  98          *             in case of I/O error
  99          */
 100         protected abstract List<Entry<String, URL>> getChapters(Progress pg)
 101                         throws IOException;
 102
 103         /**
 104          * Return the content of the chapter (possibly HTML encoded, if
 105          * {@link BasicSupport#isHtml()} is TRUE).
 106          *
 107          * @param chapUrl
 108          *            the chapter {@link URL}
 109          * @param number
 110          *            the chapter number
 111          * @param pg
 112          *            the optional progress reporter
 113          *
 114          * @return the content
 115          *
 116          * @throws IOException
 117          *             in case of I/O error
 118          */
 119         protected abstract String getChapterContent(URL chapUrl, int number,
 120                         Progress pg) throws IOException;
 121
 122         /**
 123          * Return the list of cookies (values included) that must be used to
 124          * correctly fetch the resources.
 125          * <p>
 126          * You are expected to call the super method implementation if you override
 127          * it.
 128          *
 129          * @return the cookies
 130          */
 131         public Map<String, String> getCookies() {
 132                 return new HashMap<String, String>();
 133         }
 134
 135         /**
 136          * OAuth authorisation (aka, "bearer XXXXXXX").
 137          *
 138          * @return the OAuth string
 139          */
 140         public String getOAuth() {
 141                 return null;
 142         }
 143
 144         /**
 145          * Return the canonical form of the main {@link URL}.
 146          *
 147          * @param source
 148          *            the source {@link URL}, which can be NULL
 149          *
 150          * @return the canonical form of this {@link URL} or NULL if the source was
 151          *         NULL
 152          */
 153         protected URL getCanonicalUrl(URL source) {
 154                 return source;
 155         }
 156
 157         /**
 158          * The main {@link Node} for this {@link Story}.
 159          *
 160          * @return the node
 161          */
 162         protected Element getSourceNode() {
 163                 return sourceNode;
 164         }
 165
 166         /**
 167          * The main {@link URL} for this {@link Story}.
 168          *
 169          * @return the URL
 170          */
 171         protected URL getSource() {
 172                 return source;
 173         }
 174
 175         /**
 176          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 177          * the current {@link URL} we work on.
 178          *
 179          * @return the referer
 180          */
 181         public URL getCurrentReferer() {
 182                 return currentReferer;
 183         }
 184
 185         /**
 186          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 187          * the current {@link URL} we work on.
 188          *
 189          * @param currentReferer
 190          *            the new referer
 191          */
 192         protected void setCurrentReferer(URL currentReferer) {
 193                 this.currentReferer = currentReferer;
 194         }
 195
 196         /**
 197          * The support type.
 198          *
 199          * @return the type
 200          */
 201         public SupportType getType() {
 202                 return type;
 203         }
 204
 205         /**
 206          * The support type.
 207          *
 208          * @param type
 209          *            the new type
 210          */
 211         protected void setType(SupportType type) {
 212                 this.type = type;
 213         }
 214
 215         /**
 216          * Open an input link that will be used for the support.
 217          * <p>
 218          * Can return NULL, in which case you are supposed to work without a source
 219          * node.
 220          *
 221          * @param source
 222          *            the source {@link URL}
 223          *
 224          * @return the {@link InputStream}
 225          *
 226          * @throws IOException
 227          *             in case of I/O error
 228          */
 229         protected Document loadDocument(URL source) throws IOException {
 230                 String url = getCanonicalUrl(source).toString();
 231                 return DataUtil.load(Instance.getInstance().getCache().open(source, this, false), "UTF-8", url.toString());
 232         }
 233
 234         /**
 235          * Log into the support (can be a no-op depending upon the support).
 236          *
 237          * @throws IOException
 238          *             in case of I/O error
 239          */
 240         protected void login() throws IOException {
 241         }
 242
 243         /**
 244          * Now that we have processed the {@link Story}, close the resources if any.
 245          */
 246         protected void close() {
 247                 setCurrentReferer(null);
 248         }
 249
 250         /**
 251          * Process the given story resource into a partially filled {@link Story}
 252          * object containing the name and metadata.
 253          *
 254          * @param getDesc
 255          *            retrieve the description of the story, or not
 256          * @param pg
 257          *            the optional progress reporter
 258          *
 259          * @return the {@link Story}, never NULL
 260          *
 261          * @throws IOException
 262          *             in case of I/O error
 263          */
 264         protected Story processMeta(boolean getDesc, Progress pg)
 265                         throws IOException {
 266                 if (pg == null) {
 267                         pg = new Progress();
 268                 } else {
 269                         pg.setMinMax(0, 100);
 270                 }
 271
 272                 pg.setProgress(30);
 273
 274                 Story story = new Story();
 275
 276                 MetaData meta = getMeta();
 277                 meta.setType(getType().toString());
 278                 meta.setSource(getType().getSourceName());
 279                 if (meta.getPublisher() == null) {
 280                         meta.setPublisher(getType().getSourceName());
 281                 }
 282
 283                 if (meta.getCreationDate() == null
 284                                 || meta.getCreationDate().trim().isEmpty()) {
 285                         meta.setCreationDate(bsHelper
 286                                         .formatDate(StringUtils.fromTime(new Date().getTime())));
 287                 }
 288                 story.setMeta(meta);
 289                 pg.put("meta", meta);
 290
 291                 pg.setProgress(50);
 292
 293                 if (meta.getCover() == null) {
 294                         meta.setCover(bsHelper.getDefaultCover(meta.getSubject()));
 295                 }
 296
 297                 pg.setProgress(60);
 298
 299                 if (getDesc) {
 300                         String descChapterName = Instance.getInstance().getTrans().getString(StringId.DESCRIPTION);
 301                         story.getMeta().setResume(bsPara.makeChapter(this, source, 0, descChapterName, //
 302                                         getDesc(), isHtml(), null));
 303                 }
 304
 305                 pg.done();
 306                 return story;
 307         }
 308
 309         /**
 310          * Utility method to convert the given URL into a JSON object.
 311          * <p>
 312          * Note that this method expects small JSON files (everything is copied into
 313          * memory at least twice).
 314          *
 315          * @param url
 316          *            the URL to parse
 317          * @param stable
 318          *            TRUE for more stable resources, FALSE when they often change
 319          *
 320          * @return the JSON object
 321          *
 322          * @throws IOException
 323          *             in case of I/O error
 324          */
 325         protected JSONObject getJson(String url, boolean stable)
 326                         throws IOException {
 327                 try {
 328                         return getJson(new URL(url), stable);
 329                 } catch (MalformedURLException e) {
 330                         throw new IOException("Malformed URL: " + url, e);
 331                 }
 332         }
 333
 334         /**
 335          * Utility method to convert the given URL into a JSON object.
 336          * <p>
 337          * Note that this method expects small JSON files (everything is copied into
 338          * memory at least twice).
 339          *
 340          * @param url
 341          *            the URL to parse
 342          * @param stable
 343          *            TRUE for more stable resources, FALSE when they often change
 344          *
 345          * @return the JSON object
 346          *
 347          * @throws IOException
 348          *             in case of I/O error
 349          */
 350         protected JSONObject getJson(URL url, boolean stable) throws IOException {
 351                 InputStream in = Instance.getInstance().getCache().open(url, null,
 352                                 stable);
 353                 try {
 354                         Scanner scan = new Scanner(in);
 355                         scan.useDelimiter("\0");
 356                         try {
 357                                 if(!scan.hasNext())
 358                                         throw new IOException("The given input was empty");
 359
 360                                 return new JSONObject(scan.next());
 361                         } catch (JSONException e) {
 362                                 throw new IOException(e);
 363                         } finally {
 364                                 scan.close();
 365                         }
 366                 } finally {
 367                         in.close();
 368                 }
 369         }
 370
 371         /**
 372          * Process the given story resource into a fully filled {@link Story}
 373          * object.
 374          *
 375          * @param pg
 376          *            the optional progress reporter
 377          *
 378          * @return the {@link Story}, never NULL
 379          *
 380          * @throws IOException
 381          *             in case of I/O error
 382          */
 383         // TODO: ADD final when BasicSupport_Deprecated is gone
 384         public Story process(Progress pg) throws IOException {
 385                 setCurrentReferer(source);
 386                 login();
 387                 sourceNode = loadDocument(source);
 388
 389                 try {
 390                         Story story = doProcess(pg);
 391
 392                         // Check for "no chapters" stories
 393                         if (story.getChapters().isEmpty()
 394                                         && story.getMeta().getResume() != null
 395                                         && !story.getMeta().getResume().getParagraphs().isEmpty()) {
 396                                 Chapter resume = story.getMeta().getResume();
 397                                 resume.setName("");
 398                                 resume.setNumber(1);
 399                                 story.getChapters().add(resume);
 400                                 story.getMeta().setWords(resume.getWords());
 401
 402                                 String descChapterName = Instance.getInstance().getTrans()
 403                                                 .getString(StringId.DESCRIPTION);
 404                                 resume = new Chapter(0, descChapterName);
 405                                 story.getMeta().setResume(resume);
 406                         }
 407
 408                         return story;
 409                 } finally {
 410                         close();
 411                 }
 412         }
 413
 414         /**
 415          * Actual processing step, without the calls to other methods.
 416          * <p>
 417          * Will convert the story resource into a fully filled {@link Story} object.
 418          *
 419          * @param pg
 420          *            the optional progress reporter
 421          *
 422          * @return the {@link Story}, never NULL
 423          *
 424          * @throws IOException
 425          *             in case of I/O error
 426          */
 427         protected Story doProcess(Progress pg) throws IOException {
 428                 if (pg == null) {
 429                         pg = new Progress();
 430                 } else {
 431                         pg.setMinMax(0, 100);
 432                 }
 433
 434                 pg.setName("Initialising");
 435
 436                 pg.setProgress(1);
 437                 Progress pgMeta = new Progress();
 438                 pg.addProgress(pgMeta, 10);
 439                 Story story = processMeta(true, pgMeta);
 440                 pgMeta.done(); // 10%
 441                 pg.put("meta", story.getMeta());
 442
 443                 Progress pgGetChapters = new Progress();
 444                 pg.addProgress(pgGetChapters, 10);
 445                 story.setChapters(new ArrayList<Chapter>());
 446                 List<Entry<String, URL>> chapters = getChapters(pgGetChapters);
 447                 pgGetChapters.done(); // 20%
 448
 449                 if (chapters != null) {
 450                         Progress pgChaps = new Progress("Extracting chapters", 0,
 451                                         chapters.size() * 300);
 452                         pg.addProgress(pgChaps, 80);
 453
 454                         long words = 0;
 455                         int i = 1;
 456                         for (Entry<String, URL> chap : chapters) {
 457                                 pgChaps.setName("Extracting chapter " + i);
 458                                 URL chapUrl = chap.getValue();
 459                                 String chapName = chap.getKey();
 460                                 if (chapUrl != null) {
 461                                         setCurrentReferer(chapUrl);
 462                                 }
 463
 464                                 pgChaps.setProgress(i * 100);
 465                                 Progress pgGetChapterContent = new Progress();
 466                                 Progress pgMakeChapter = new Progress();
 467                                 pgChaps.addProgress(pgGetChapterContent, 100);
 468                                 pgChaps.addProgress(pgMakeChapter, 100);
 469
 470                                 String content = getChapterContent(chapUrl, i,
 471                                                 pgGetChapterContent);
 472                                 pgGetChapterContent.done();
 473                                 Chapter cc = bsPara.makeChapter(this, chapUrl, i,
 474                                                 chapName, content, isHtml(), pgMakeChapter);
 475                                 pgMakeChapter.done();
 476
 477                                 words += cc.getWords();
 478                                 story.getChapters().add(cc);
 479
 480                                 i++;
 481                         }
 482
 483                         story.getMeta().setWords(words);
 484
 485                         pgChaps.setName("Extracting chapters");
 486                         pgChaps.done();
 487                 }
 488
 489                 pg.done();
 490
 491                 return story;
 492         }
 493
 494         /**
 495          * Create a chapter from the given data.
 496          *
 497          * @param source
 498          *            the source URL for this content, which can be used to try and
 499          *            find images if images are present in the format [image-url]
 500          * @param number
 501          *            the chapter number (0 = description)
 502          * @param name
 503          *            the chapter name
 504          * @param content
 505          *            the content of the chapter
 506          *
 507          * @return the {@link Chapter}, never NULL
 508          *
 509          * @throws IOException
 510          *             in case of I/O error
 511          */
 512         public Chapter makeChapter(URL source, int number, String name,
 513                         String content) throws IOException {
 514                 return bsPara.makeChapter(this, source, number, name,
 515                                 content, isHtml(), null);
 516         }
 517
 518         /**
 519          * Return a {@link BasicSupport} implementation supporting the given
 520          * resource if possible.
 521          *
 522          * @param url
 523          *            the story resource
 524          *
 525          * @return an implementation that supports it, or NULL
 526          */
 527         public static BasicSupport getSupport(URL url) {
 528                 if (url == null) {
 529                         return null;
 530                 }
 531
 532                 // TEXT and INFO_TEXT always support files (not URLs though)
 533                 for (SupportType type : SupportType.values()) {
 534                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
 535                                 BasicSupport support = getSupport(type, url);
 536                                 if (support != null && support.supports(url)) {
 537                                         return support;
 538                                 }
 539                         }
 540                 }
 541
 542                 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
 543                                 SupportType.TEXT }) {
 544                         BasicSupport support = getSupport(type, url);
 545                         if (support != null && support.supports(url)) {
 546                                 return support;
 547                         }
 548                 }
 549
 550                 return null;
 551         }
 552
 553         /**
 554          * Return a {@link BasicSupport} implementation supporting the given type.
 555          *
 556          * @param type
 557          *            the type, must not be NULL
 558          * @param url
 559          *            the {@link URL} to support (can be NULL to get an
 560          *            "abstract support"; if not NULL, will be used as the source
 561          *            URL)
 562          *
 563          * @return an implementation that supports it, or NULL
 564          */
 565         public static BasicSupport getSupport(SupportType type, URL url) {
 566                 BasicSupport support = null;
 567
 568                 switch (type) {
 569                 case EPUB:
 570                         support = new Epub();
 571                         break;
 572                 case INFO_TEXT:
 573                         support = new InfoText();
 574                         break;
 575                 case FIMFICTION:
 576                         try {
 577                                 // Can fail if no client key or NO in options
 578                                 support = new FimfictionApi();
 579                         } catch (IOException e) {
 580                                 support = new Fimfiction();
 581                         }
 582                         break;
 583                 case FANFICTION:
 584                         support = new Fanfiction();
 585                         break;
 586                 case TEXT:
 587                         support = new Text();
 588                         break;
 589                 case MANGAHUB:
 590                         support = new MangaHub();
 591                         break;
 592                 case E621:
 593                         support = new E621();
 594                         break;
 595                 case YIFFSTAR:
 596                         support = new YiffStar();
 597                         break;
 598                 case E_HENTAI:
 599                         support = new EHentai();
 600                         break;
 601                 case MANGA_LEL:
 602                         support = new MangaLel();
 603                         break;
 604                 case CBZ:
 605                         support = new Cbz();
 606                         break;
 607                 case HTML:
 608                         support = new Html();
 609                         break;
 610                 }
 611
 612                 if (support != null) {
 613                         support.setType(type);
 614                         support.source = support.getCanonicalUrl(url);
 615                 }
 616
 617                 return support;
 618         }
 619 }