fanfix/supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.URL;
   6 import java.util.ArrayList;
   7 import java.util.Date;
   8 import java.util.HashMap;
   9 import java.util.List;
  10 import java.util.Map;
  11 import java.util.Map.Entry;
  12
  13 import org.jsoup.helper.DataUtil;
  14 import org.jsoup.nodes.Document;
  15 import org.jsoup.nodes.Element;
  16 import org.jsoup.nodes.Node;
  17
  18 import be.nikiroo.fanfix.Instance;
  19 import be.nikiroo.fanfix.bundles.StringId;
  20 import be.nikiroo.fanfix.data.Chapter;
  21 import be.nikiroo.fanfix.data.MetaData;
  22 import be.nikiroo.fanfix.data.Story;
  23 import be.nikiroo.utils.Progress;
  24 import be.nikiroo.utils.StringUtils;
  25
  26 /**
  27  * This class is the base class used by the other support classes. It can be
  28  * used outside of this package, and have static method that you can use to get
  29  * access to the correct support class.
  30  * <p>
  31  * It will be used with 'resources' (usually web pages or files).
  32  *
  33  * @author niki
  34  */
  35 public abstract class BasicSupport {
  36         private Document sourceNode;
  37         private URL source;
  38         private SupportType type;
  39         private URL currentReferer; // with only one 'r', as in 'HTTP'...
  40
  41         /**
  42          * The name of this support class.
  43          *
  44          * @return the name
  45          */
  46         protected abstract String getSourceName();
  47
  48         /**
  49          * Check if the given resource is supported by this {@link BasicSupport}.
  50          *
  51          * @param url
  52          *            the resource to check for
  53          *
  54          * @return TRUE if it is
  55          */
  56         protected abstract boolean supports(URL url);
  57
  58         /**
  59          * Return TRUE if the support will return HTML encoded content values for
  60          * the chapters content.
  61          *
  62          * @return TRUE for HTML
  63          */
  64         protected abstract boolean isHtml();
  65
  66         /**
  67          * Return the {@link MetaData} of this story.
  68          *
  69          * @return the associated {@link MetaData}, never NULL
  70          *
  71          * @throws IOException
  72          *             in case of I/O error
  73          */
  74         protected abstract MetaData getMeta() throws IOException;
  75
  76         /**
  77          * Return the story description.
  78          *
  79          * @return the description
  80          *
  81          * @throws IOException
  82          *             in case of I/O error
  83          */
  84         protected abstract String getDesc() throws IOException;
  85
  86         /**
  87          * Return the list of chapters (name and resource). *
  88          * <p>
  89          * Can be NULL if this {@link BasicSupport} do no use chapters.
  90          *
  91          * @param pg
  92          *            the optional progress reporter
  93          *
  94          * @return the chapters or NULL
  95          *
  96          * @throws IOException
  97          *             in case of I/O error
  98          */
  99         protected abstract List<Entry<String, URL>> getChapters(Progress pg)
 100                         throws IOException;
 101
 102         /**
 103          * Return the content of the chapter (possibly HTML encoded, if
 104          * {@link BasicSupport#isHtml()} is TRUE).
 105          *
 106          * @param chapUrl
 107          *            the chapter {@link URL}
 108          * @param number
 109          *            the chapter number
 110          * @param pg
 111          *            the optional progress reporter
 112          *
 113          * @return the content
 114          *
 115          * @throws IOException
 116          *             in case of I/O error
 117          */
 118         protected abstract String getChapterContent(URL chapUrl, int number,
 119                         Progress pg) throws IOException;
 120
 121         /**
 122          * Return the list of cookies (values included) that must be used to
 123          * correctly fetch the resources.
 124          * <p>
 125          * You are expected to call the super method implementation if you override
 126          * it.
 127          *
 128          * @return the cookies
 129          */
 130         public Map<String, String> getCookies() {
 131                 return new HashMap<String, String>();
 132         }
 133
 134         /**
 135          * OAuth authorisation (aka, "bearer XXXXXXX").
 136          *
 137          * @return the OAuth string
 138          */
 139         public String getOAuth() {
 140                 return null;
 141         }
 142
 143         /**
 144          * Return the canonical form of the main {@link URL}.
 145          *
 146          * @param source
 147          *            the source {@link URL}, which can be NULL
 148          *
 149          * @return the canonical form of this {@link URL} or NULL if the source was
 150          *         NULL
 151          */
 152         protected URL getCanonicalUrl(URL source) {
 153                 return source;
 154         }
 155
 156         /**
 157          * The main {@link Node} for this {@link Story}.
 158          *
 159          * @return the node
 160          */
 161         protected Element getSourceNode() {
 162                 return sourceNode;
 163         }
 164
 165         /**
 166          * The main {@link URL} for this {@link Story}.
 167          *
 168          * @return the URL
 169          */
 170         protected URL getSource() {
 171                 return source;
 172         }
 173
 174         /**
 175          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 176          * the current {@link URL} we work on.
 177          *
 178          * @return the referer
 179          */
 180         public URL getCurrentReferer() {
 181                 return currentReferer;
 182         }
 183
 184         /**
 185          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 186          * the current {@link URL} we work on.
 187          *
 188          * @param currentReferer
 189          *            the new referer
 190          */
 191         protected void setCurrentReferer(URL currentReferer) {
 192                 this.currentReferer = currentReferer;
 193         }
 194
 195         /**
 196          * The support type.
 197          *
 198          * @return the type
 199          */
 200         public SupportType getType() {
 201                 return type;
 202         }
 203
 204         /**
 205          * The support type.
 206          *
 207          * @param type
 208          *            the new type
 209          */
 210         protected void setType(SupportType type) {
 211                 this.type = type;
 212         }
 213
 214         /**
 215          * Open an input link that will be used for the support.
 216          * <p>
 217          * Can return NULL, in which case you are supposed to work without a source
 218          * node.
 219          *
 220          * @param source
 221          *            the source {@link URL}
 222          *
 223          * @return the {@link InputStream}
 224          *
 225          * @throws IOException
 226          *             in case of I/O error
 227          */
 228         protected Document loadDocument(URL source) throws IOException {
 229                 String url = getCanonicalUrl(source).toString();
 230                 return DataUtil.load(Instance.getCache().open(source, this, false),
 231                                 "UTF-8", url.toString());
 232         }
 233
 234         /**
 235          * Log into the support (can be a no-op depending upon the support).
 236          *
 237          * @throws IOException
 238          *             in case of I/O error
 239          */
 240         protected void login() throws IOException {
 241         }
 242
 243         /**
 244          * Now that we have processed the {@link Story}, close the resources if any.
 245          */
 246         protected void close() {
 247                 setCurrentReferer(null);
 248         }
 249
 250         /**
 251          * Process the given story resource into a partially filled {@link Story}
 252          * object containing the name and metadata, except for the description.
 253          *
 254          * @return the {@link Story}
 255          *
 256          * @throws IOException
 257          *             in case of I/O error
 258          */
 259         public final Story processMeta() throws IOException {
 260                 Story story = null;
 261
 262                 try {
 263                         story = processMeta(false, null);
 264                 } finally {
 265                         close();
 266                 }
 267
 268                 return story;
 269         }
 270
 271         /**
 272          * Process the given story resource into a partially filled {@link Story}
 273          * object containing the name and metadata.
 274          *
 275          * @param getDesc
 276          *            retrieve the description of the story, or not
 277          * @param pg
 278          *            the optional progress reporter
 279          *
 280          * @return the {@link Story}, never NULL
 281          *
 282          * @throws IOException
 283          *             in case of I/O error
 284          */
 285         protected Story processMeta(boolean getDesc, Progress pg)
 286                         throws IOException {
 287                 if (pg == null) {
 288                         pg = new Progress();
 289                 } else {
 290                         pg.setMinMax(0, 100);
 291                 }
 292
 293                 pg.setProgress(30);
 294
 295                 Story story = new Story();
 296                 MetaData meta = getMeta();
 297                 if (meta.getCreationDate() == null || meta.getCreationDate().isEmpty()) {
 298                         meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
 299                 }
 300                 story.setMeta(meta);
 301
 302                 pg.setProgress(50);
 303
 304                 if (meta.getCover() == null) {
 305                         meta.setCover(BasicSupportHelper.getDefaultCover(meta.getSubject()));
 306                 }
 307
 308                 pg.setProgress(60);
 309
 310                 if (getDesc) {
 311                         String descChapterName = Instance.getTrans().getString(
 312                                         StringId.DESCRIPTION);
 313                         story.getMeta().setResume(
 314                                         BasicSupportPara.makeChapter(this, source, 0,
 315                                                         descChapterName, //
 316                                                         getDesc(), isHtml(), null));
 317                 }
 318
 319                 pg.setProgress(100);
 320                 return story;
 321         }
 322
 323         /**
 324          * Actual processing step, without the calls to other methods.
 325          * <p>
 326          * Will convert the story resource into a fully filled {@link Story} object.
 327          *
 328          * @param pg
 329          *            the optional progress reporter
 330          *
 331          * @return the {@link Story}, never NULL
 332          *
 333          * @throws IOException
 334          *             in case of I/O error
 335          */
 336         // TODO: add final
 337         public Story process(Progress pg) throws IOException {
 338                 setCurrentReferer(source);
 339                 login();
 340                 sourceNode = loadDocument(source);
 341
 342                 try {
 343                         return doProcess(pg);
 344                 } finally {
 345                         close();
 346                 }
 347         }
 348
 349         /**
 350          * Process the given story resource into a fully filled {@link Story}
 351          * object.
 352          *
 353          * @param pg
 354          *            the optional progress reporter
 355          *
 356          * @return the {@link Story}, never NULL
 357          *
 358          * @throws IOException
 359          *             in case of I/O error
 360          */
 361         public Story doProcess(Progress pg) throws IOException {
 362                 if (pg == null) {
 363                         pg = new Progress();
 364                 } else {
 365                         pg.setMinMax(0, 100);
 366                 }
 367
 368                 pg.setProgress(1);
 369                 Progress pgMeta = new Progress();
 370                 pg.addProgress(pgMeta, 10);
 371                 Story story = processMeta(true, pgMeta);
 372                 if (!pgMeta.isDone()) {
 373                         pgMeta.setProgress(pgMeta.getMax()); // 10%
 374                 }
 375
 376                 pg.setName("Retrieving " + story.getMeta().getTitle());
 377
 378                 Progress pgGetChapters = new Progress();
 379                 pg.addProgress(pgGetChapters, 10);
 380                 story.setChapters(new ArrayList<Chapter>());
 381                 List<Entry<String, URL>> chapters = getChapters(pgGetChapters);
 382                 if (!pgGetChapters.isDone()) {
 383                         pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
 384                 }
 385
 386                 if (chapters != null) {
 387                         Progress pgChaps = new Progress("Extracting chapters", 0,
 388                                         chapters.size() * 300);
 389                         pg.addProgress(pgChaps, 80);
 390
 391                         long words = 0;
 392                         int i = 1;
 393                         for (Entry<String, URL> chap : chapters) {
 394                                 pgChaps.setName("Extracting chapter " + i);
 395                                 URL chapUrl = chap.getValue();
 396                                 String chapName = chap.getKey();
 397                                 if (chapUrl != null) {
 398                                         setCurrentReferer(chapUrl);
 399                                 }
 400
 401                                 pgChaps.setProgress(i * 100);
 402                                 Progress pgGetChapterContent = new Progress();
 403                                 Progress pgMakeChapter = new Progress();
 404                                 pgChaps.addProgress(pgGetChapterContent, 100);
 405                                 pgChaps.addProgress(pgMakeChapter, 100);
 406
 407                                 String content = getChapterContent(chapUrl, i,
 408                                                 pgGetChapterContent);
 409                                 if (!pgGetChapterContent.isDone()) {
 410                                         pgGetChapterContent.setProgress(pgGetChapterContent
 411                                                         .getMax());
 412                                 }
 413
 414                                 Chapter cc = BasicSupportPara.makeChapter(this, chapUrl, i,
 415                                                 chapName, content, isHtml(), pgMakeChapter);
 416                                 if (!pgMakeChapter.isDone()) {
 417                                         pgMakeChapter.setProgress(pgMakeChapter.getMax());
 418                                 }
 419
 420                                 words += cc.getWords();
 421                                 story.getChapters().add(cc);
 422                                 story.getMeta().setWords(words);
 423
 424                                 i++;
 425                         }
 426
 427                         pgChaps.setName("Extracting chapters");
 428                 } else {
 429                         pg.setProgress(80);
 430                 }
 431
 432                 return story;
 433         }
 434
 435         /**
 436          * Return a {@link BasicSupport} implementation supporting the given
 437          * resource if possible.
 438          *
 439          * @param url
 440          *            the story resource
 441          *
 442          * @return an implementation that supports it, or NULL
 443          */
 444         public static BasicSupport getSupport(URL url) {
 445                 if (url == null) {
 446                         return null;
 447                 }
 448
 449                 // TEXT and INFO_TEXT always support files (not URLs though)
 450                 for (SupportType type : SupportType.values()) {
 451                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
 452                                 BasicSupport support = getSupport(type, url);
 453                                 if (support != null && support.supports(url)) {
 454                                         return support;
 455                                 }
 456                         }
 457                 }
 458
 459                 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
 460                                 SupportType.TEXT }) {
 461                         BasicSupport support = getSupport(type, url);
 462                         if (support != null && support.supports(url)) {
 463                                 return support;
 464                         }
 465                 }
 466
 467                 return null;
 468         }
 469
 470         /**
 471          * Return a {@link BasicSupport} implementation supporting the given type.
 472          *
 473          * @param type
 474          *            the type
 475          * @param url
 476          *            the {@link URL} to support (can be NULL to get an
 477          *            "abstract support")
 478          *
 479          * @return an implementation that supports it, or NULL
 480          */
 481         public static BasicSupport getSupport(SupportType type, URL url) {
 482                 BasicSupport support = null;
 483
 484                 switch (type) {
 485                 case EPUB:
 486                         support = new Epub();
 487                         break;
 488                 case INFO_TEXT:
 489                         support = new InfoText();
 490                         break;
 491                 case FIMFICTION:
 492                         try {
 493                                 // Can fail if no client key or NO in options
 494                                 support = new FimfictionApi();
 495                         } catch (IOException e) {
 496                                 support = new Fimfiction();
 497                         }
 498                         break;
 499                 case FANFICTION:
 500                         support = new Fanfiction();
 501                         break;
 502                 case TEXT:
 503                         support = new Text();
 504                         break;
 505                 case MANGAFOX:
 506                         support = new MangaFox();
 507                         break;
 508                 case E621:
 509                         support = new E621();
 510                         break;
 511                 case YIFFSTAR:
 512                         support = new YiffStar();
 513                         break;
 514                 case E_HENTAI:
 515                         support = new EHentai();
 516                         break;
 517                 case MANGA_LEL:
 518                         support = new MangaLel();
 519                         break;
 520                 case CBZ:
 521                         support = new Cbz();
 522                         break;
 523                 case HTML:
 524                         support = new Html();
 525                         break;
 526                 }
 527
 528                 if (support != null) {
 529                         support.setType(type);
 530                         support.source = support.getCanonicalUrl(url);
 531                 }
 532
 533                 return support;
 534         }
 535 }